@inproceedings{zhang-etal-2025-cot,
title = "{C}o{T}-based Synthesizer: Enhancing {LLM} Performance through Answer Synthesis",
author = "Zhang, Bohan and
Zhang, Xiaokang and
Zhang, Jing and
Yu, Jifan and
Luo, Sijia and
Tang, Jie",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.315/",
doi = "10.18653/v1/2025.acl-long.315",
pages = "6286--6303",
ISBN = "979-8-89176-251-0",
abstract = "Current inference scaling methods, such as Self-consistency and Best-of-N, have proven effective in improving the accuracy of LLMs on complex reasoning tasks. However, these methods rely heavily on the quality of candidate responses and are unable to produce correct answers when all candidates are incorrect. In this paper, we propose a novel inference scaling strategy, CoT-based Synthesizer, which leverages CoT reasoning to synthesize superior answers by analyzing complementary information from multiple candidate responses, even when all candidates are flawed. To support a lightweight and cost-effective implementation, we introduce an automated data generation pipeline that creates diverse training data. This enables smaller LLMs trained on this data to improve the inference accuracy of larger models, including API-based LLMs. Experimental results across four benchmark datasets with seven policy models demonstrate that our method significantly enhances performance, with gains of 11.8{\%} for Llama3-8B and 10.3{\%} for GPT-4o on the MATH dataset. The corresponding training data and code are publicly available on the [repository](https://github.com/RUCKBReasoning/CoT-based-Synthesizer)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-cot">
<titleInfo>
<title>CoT-based Synthesizer: Enhancing LLM Performance through Answer Synthesis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bohan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaokang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jifan</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sijia</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Current inference scaling methods, such as Self-consistency and Best-of-N, have proven effective in improving the accuracy of LLMs on complex reasoning tasks. However, these methods rely heavily on the quality of candidate responses and are unable to produce correct answers when all candidates are incorrect. In this paper, we propose a novel inference scaling strategy, CoT-based Synthesizer, which leverages CoT reasoning to synthesize superior answers by analyzing complementary information from multiple candidate responses, even when all candidates are flawed. To support a lightweight and cost-effective implementation, we introduce an automated data generation pipeline that creates diverse training data. This enables smaller LLMs trained on this data to improve the inference accuracy of larger models, including API-based LLMs. Experimental results across four benchmark datasets with seven policy models demonstrate that our method significantly enhances performance, with gains of 11.8% for Llama3-8B and 10.3% for GPT-4o on the MATH dataset. The corresponding training data and code are publicly available on the [repository](https://github.com/RUCKBReasoning/CoT-based-Synthesizer).</abstract>
<identifier type="citekey">zhang-etal-2025-cot</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.315</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.315/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>6286</start>
<end>6303</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CoT-based Synthesizer: Enhancing LLM Performance through Answer Synthesis
%A Zhang, Bohan
%A Zhang, Xiaokang
%A Zhang, Jing
%A Yu, Jifan
%A Luo, Sijia
%A Tang, Jie
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F zhang-etal-2025-cot
%X Current inference scaling methods, such as Self-consistency and Best-of-N, have proven effective in improving the accuracy of LLMs on complex reasoning tasks. However, these methods rely heavily on the quality of candidate responses and are unable to produce correct answers when all candidates are incorrect. In this paper, we propose a novel inference scaling strategy, CoT-based Synthesizer, which leverages CoT reasoning to synthesize superior answers by analyzing complementary information from multiple candidate responses, even when all candidates are flawed. To support a lightweight and cost-effective implementation, we introduce an automated data generation pipeline that creates diverse training data. This enables smaller LLMs trained on this data to improve the inference accuracy of larger models, including API-based LLMs. Experimental results across four benchmark datasets with seven policy models demonstrate that our method significantly enhances performance, with gains of 11.8% for Llama3-8B and 10.3% for GPT-4o on the MATH dataset. The corresponding training data and code are publicly available on the [repository](https://github.com/RUCKBReasoning/CoT-based-Synthesizer).
%R 10.18653/v1/2025.acl-long.315
%U https://aclanthology.org/2025.acl-long.315/
%U https://doi.org/10.18653/v1/2025.acl-long.315
%P 6286-6303
Markdown (Informal)
[CoT-based Synthesizer: Enhancing LLM Performance through Answer Synthesis](https://aclanthology.org/2025.acl-long.315/) (Zhang et al., ACL 2025)
ACL