@inproceedings{deng-etal-2025-beyond,
title = "Beyond Binary Preferences: Semi-Online Label-Free {GRACE}-{KTO} with Group-Wise Adaptive Calibration for High-Quality Long-Text Generation",
author = "Deng, Jingyang and
Chen, Ran and
Cheng, Jo-Ku and
Ma, Jinwen",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.951/",
pages = "17550--17562",
ISBN = "979-8-89176-335-7",
abstract = "Generating high-quality long-text remains challenging for Large Language Models (LLMs), as conventional supervised fine-tuning fails to ensure overall quality due to its teacher-forcing nature. Kahneman-Tversky Optimization (KTO), as a model alignment method that can holistically optimize generation quality, overcomes the need for paired preference data required by previous methods. However, it still suffers from binary supervision that inadequately reflects varying quality degrees. To address this, we propose GRACE-KTO, a semi-online framework that transforms KTO{'}s binary signals into dynamically calibrated intra-group rewards. Specifically, GRACE-KTO aggregates responses to identical queries into groups, computes rank-sum scores across multiple linguistic quality dimensions, and applies group-wise and global normalization to adaptively redistribute sample importance. We adopt a semi-online training strategy to reduce costly online sampling while outperforming offline variants. By leveraging query generation with seed data, we minimize labeled data dependency, using the model{'}s own knowledge to enhance its long-text generation capabilities. Additionally, we extend the context window to 32k tokens using YaRN during inference, enabling the model to generate longer texts while maintaining perplexities. Experiments demonstrate GRACE-KTO{'}s superiority over vanilla KTO on both automatic metrics and LLM-as-a-Judge evaluations, advancing long-text generation through group-wise adaptive calibration."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deng-etal-2025-beyond">
<titleInfo>
<title>Beyond Binary Preferences: Semi-Online Label-Free GRACE-KTO with Group-Wise Adaptive Calibration for High-Quality Long-Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jingyang</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ran</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jo-Ku</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinwen</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Generating high-quality long-text remains challenging for Large Language Models (LLMs), as conventional supervised fine-tuning fails to ensure overall quality due to its teacher-forcing nature. Kahneman-Tversky Optimization (KTO), as a model alignment method that can holistically optimize generation quality, overcomes the need for paired preference data required by previous methods. However, it still suffers from binary supervision that inadequately reflects varying quality degrees. To address this, we propose GRACE-KTO, a semi-online framework that transforms KTO’s binary signals into dynamically calibrated intra-group rewards. Specifically, GRACE-KTO aggregates responses to identical queries into groups, computes rank-sum scores across multiple linguistic quality dimensions, and applies group-wise and global normalization to adaptively redistribute sample importance. We adopt a semi-online training strategy to reduce costly online sampling while outperforming offline variants. By leveraging query generation with seed data, we minimize labeled data dependency, using the model’s own knowledge to enhance its long-text generation capabilities. Additionally, we extend the context window to 32k tokens using YaRN during inference, enabling the model to generate longer texts while maintaining perplexities. Experiments demonstrate GRACE-KTO’s superiority over vanilla KTO on both automatic metrics and LLM-as-a-Judge evaluations, advancing long-text generation through group-wise adaptive calibration.</abstract>
<identifier type="citekey">deng-etal-2025-beyond</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.951/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>17550</start>
<end>17562</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Binary Preferences: Semi-Online Label-Free GRACE-KTO with Group-Wise Adaptive Calibration for High-Quality Long-Text Generation
%A Deng, Jingyang
%A Chen, Ran
%A Cheng, Jo-Ku
%A Ma, Jinwen
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F deng-etal-2025-beyond
%X Generating high-quality long-text remains challenging for Large Language Models (LLMs), as conventional supervised fine-tuning fails to ensure overall quality due to its teacher-forcing nature. Kahneman-Tversky Optimization (KTO), as a model alignment method that can holistically optimize generation quality, overcomes the need for paired preference data required by previous methods. However, it still suffers from binary supervision that inadequately reflects varying quality degrees. To address this, we propose GRACE-KTO, a semi-online framework that transforms KTO’s binary signals into dynamically calibrated intra-group rewards. Specifically, GRACE-KTO aggregates responses to identical queries into groups, computes rank-sum scores across multiple linguistic quality dimensions, and applies group-wise and global normalization to adaptively redistribute sample importance. We adopt a semi-online training strategy to reduce costly online sampling while outperforming offline variants. By leveraging query generation with seed data, we minimize labeled data dependency, using the model’s own knowledge to enhance its long-text generation capabilities. Additionally, we extend the context window to 32k tokens using YaRN during inference, enabling the model to generate longer texts while maintaining perplexities. Experiments demonstrate GRACE-KTO’s superiority over vanilla KTO on both automatic metrics and LLM-as-a-Judge evaluations, advancing long-text generation through group-wise adaptive calibration.
%U https://aclanthology.org/2025.findings-emnlp.951/
%P 17550-17562
Markdown (Informal)
[Beyond Binary Preferences: Semi-Online Label-Free GRACE-KTO with Group-Wise Adaptive Calibration for High-Quality Long-Text Generation](https://aclanthology.org/2025.findings-emnlp.951/) (Deng et al., Findings 2025)
ACL