@inproceedings{yadav-etal-2025-investigating,
title = "Investigating the effectiveness of length based rewards in {DPO} for building Conversational Financial Question Answering Systems",
author = "Yadav, Anushka and
Rallabandi, Sai Krishna and
Dakle, Parag Pravin and
Raghavan, Preethi",
editor = "Chen, Chung-Chi and
Moreno-Sandoval, Antonio and
Huang, Jimin and
Xie, Qianqian and
Ananiadou, Sophia and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.finnlp-1.12/",
pages = "134--140",
abstract = "In this paper, we address the numerical reasoning challenges of financial question-answering systems. We propose a two-stage approach where models first generate intermediate calculations and then produce the final answer. We perform two set of experiments to evaluate the performance of our approach. In the first, we compare single-step and multi-step approaches, demonstrating that incorporating intermediate calculations significantly improves numerical accuracy. In the second experiment, we compare traditional DPO and iterative DPO (iDPO) with length-regularized DPO. We show that while traditional DPO reduced parsing errors, it introduces verbosity; iDPO improves reasoning iteratively but faces diminishing returns. On the other hand, Length-regularized DPO reduces verbosity of intermediate calculation as well as enhances numerical accuracy across all models. These results highlight the potential of combining intermediate reasoning steps with domain-specific optimizations to build robust financial question-answering systems."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yadav-etal-2025-investigating">
<titleInfo>
<title>Investigating the effectiveness of length based rewards in DPO for building Conversational Financial Question Answering Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anushka</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Rallabandi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parag</namePart>
<namePart type="given">Pravin</namePart>
<namePart type="family">Dakle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preethi</namePart>
<namePart type="family">Raghavan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Moreno-Sandoval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qianqian</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we address the numerical reasoning challenges of financial question-answering systems. We propose a two-stage approach where models first generate intermediate calculations and then produce the final answer. We perform two set of experiments to evaluate the performance of our approach. In the first, we compare single-step and multi-step approaches, demonstrating that incorporating intermediate calculations significantly improves numerical accuracy. In the second experiment, we compare traditional DPO and iterative DPO (iDPO) with length-regularized DPO. We show that while traditional DPO reduced parsing errors, it introduces verbosity; iDPO improves reasoning iteratively but faces diminishing returns. On the other hand, Length-regularized DPO reduces verbosity of intermediate calculation as well as enhances numerical accuracy across all models. These results highlight the potential of combining intermediate reasoning steps with domain-specific optimizations to build robust financial question-answering systems.</abstract>
<identifier type="citekey">yadav-etal-2025-investigating</identifier>
<location>
<url>https://aclanthology.org/2025.finnlp-1.12/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>134</start>
<end>140</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating the effectiveness of length based rewards in DPO for building Conversational Financial Question Answering Systems
%A Yadav, Anushka
%A Rallabandi, Sai Krishna
%A Dakle, Parag Pravin
%A Raghavan, Preethi
%Y Chen, Chung-Chi
%Y Moreno-Sandoval, Antonio
%Y Huang, Jimin
%Y Xie, Qianqian
%Y Ananiadou, Sophia
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F yadav-etal-2025-investigating
%X In this paper, we address the numerical reasoning challenges of financial question-answering systems. We propose a two-stage approach where models first generate intermediate calculations and then produce the final answer. We perform two set of experiments to evaluate the performance of our approach. In the first, we compare single-step and multi-step approaches, demonstrating that incorporating intermediate calculations significantly improves numerical accuracy. In the second experiment, we compare traditional DPO and iterative DPO (iDPO) with length-regularized DPO. We show that while traditional DPO reduced parsing errors, it introduces verbosity; iDPO improves reasoning iteratively but faces diminishing returns. On the other hand, Length-regularized DPO reduces verbosity of intermediate calculation as well as enhances numerical accuracy across all models. These results highlight the potential of combining intermediate reasoning steps with domain-specific optimizations to build robust financial question-answering systems.
%U https://aclanthology.org/2025.finnlp-1.12/
%P 134-140
Markdown (Informal)
[Investigating the effectiveness of length based rewards in DPO for building Conversational Financial Question Answering Systems](https://aclanthology.org/2025.finnlp-1.12/) (Yadav et al., FinNLP 2025)
ACL