@inproceedings{yang-etal-2025-thinkanswer,
title = "{T}hink{A}nswer Loss: Balancing Semantic Similarity and Exact Matching for {LLM} Reasoning Enhancement",
author = "Yang, Shan and
Wu, Kun and
Li, Zeju and
Zhang, Linlin and
Pei, Xiangyu and
An, Leike and
Liu, Yu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.177/",
pages = "3325--3347",
ISBN = "979-8-89176-335-7",
abstract = "Knowledge distillation for large language models often uses Chain-of-Thought (CoT) and answer pairs, but existing methods struggle with appropriate supervision signals. Uniform constraints (e.g., cross-entropy) on CoT can enforce literal, verbose reasoning and suppress expressive diversity, while solely semantic constraints on answers can reduce accuracy in classification tasks. This paper proposes ThinkAnswer Loss, an information-theoretic differential supervision framework that decouples CoT and answer supervision. ThinkAnswer Loss applies semantic similarity constraints to the CoT portion while maintaining strict literal matching for the answer. We theoretically demonstrate its connection to mutual information maximization and derive a tight upper bound on generalization error. Experimental validation on text quality assessment and mathematical reasoning tasks shows that our method maintains answer accuracy while effectively reducing CoT length and preserving semantic content, thereby accelerating inference."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2025-thinkanswer">
<titleInfo>
<title>ThinkAnswer Loss: Balancing Semantic Similarity and Exact Matching for LLM Reasoning Enhancement</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shan</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kun</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeju</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linlin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leike</namePart>
<namePart type="family">An</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Knowledge distillation for large language models often uses Chain-of-Thought (CoT) and answer pairs, but existing methods struggle with appropriate supervision signals. Uniform constraints (e.g., cross-entropy) on CoT can enforce literal, verbose reasoning and suppress expressive diversity, while solely semantic constraints on answers can reduce accuracy in classification tasks. This paper proposes ThinkAnswer Loss, an information-theoretic differential supervision framework that decouples CoT and answer supervision. ThinkAnswer Loss applies semantic similarity constraints to the CoT portion while maintaining strict literal matching for the answer. We theoretically demonstrate its connection to mutual information maximization and derive a tight upper bound on generalization error. Experimental validation on text quality assessment and mathematical reasoning tasks shows that our method maintains answer accuracy while effectively reducing CoT length and preserving semantic content, thereby accelerating inference.</abstract>
<identifier type="citekey">yang-etal-2025-thinkanswer</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.177/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>3325</start>
<end>3347</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ThinkAnswer Loss: Balancing Semantic Similarity and Exact Matching for LLM Reasoning Enhancement
%A Yang, Shan
%A Wu, Kun
%A Li, Zeju
%A Zhang, Linlin
%A Pei, Xiangyu
%A An, Leike
%A Liu, Yu
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F yang-etal-2025-thinkanswer
%X Knowledge distillation for large language models often uses Chain-of-Thought (CoT) and answer pairs, but existing methods struggle with appropriate supervision signals. Uniform constraints (e.g., cross-entropy) on CoT can enforce literal, verbose reasoning and suppress expressive diversity, while solely semantic constraints on answers can reduce accuracy in classification tasks. This paper proposes ThinkAnswer Loss, an information-theoretic differential supervision framework that decouples CoT and answer supervision. ThinkAnswer Loss applies semantic similarity constraints to the CoT portion while maintaining strict literal matching for the answer. We theoretically demonstrate its connection to mutual information maximization and derive a tight upper bound on generalization error. Experimental validation on text quality assessment and mathematical reasoning tasks shows that our method maintains answer accuracy while effectively reducing CoT length and preserving semantic content, thereby accelerating inference.
%U https://aclanthology.org/2025.findings-emnlp.177/
%P 3325-3347
Markdown (Informal)
[ThinkAnswer Loss: Balancing Semantic Similarity and Exact Matching for LLM Reasoning Enhancement](https://aclanthology.org/2025.findings-emnlp.177/) (Yang et al., Findings 2025)
ACL