@inproceedings{niess-etal-2025-addressing,
title = "Addressing Hallucination in Causal {Q}{\&}{A}: The Efficacy of Fine-tuning over Prompting in {LLM}s",
author = "Niess, Georg and
Razouk, Houssam and
Mandic, Stasa and
Kern, Roman",
editor = "Chen, Chung-Chi and
Moreno-Sandoval, Antonio and
Huang, Jimin and
Xie, Qianqian and
Ananiadou, Sophia and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.finnlp-1.27/",
pages = "253--258",
abstract = "This paper presents our approach and findings for participating in the FinCausal 2025 competition, which addresses causal question answering derived from financial documents, specifically English and Spanish annual reports. We investigate the effectiveness of generative models, such as Llama, in contrast to common extractive methods like BERT-based token classification. While prompt optimization and few-shot learning offer some improvements, they were insufficient for consistently outperforming extractive methods in FinCausal, suffering from hallucinations. In contrast, fine-tuning generative models was shown to be essential for minimizing hallucinations and achieving superior performance. Using our fine-tuned multilingual model for both tasks, we outperform our extractive and monolingual approaches, achieving top results for Spanish and second-best for English in the competition. Our findings indicate that fine-tuned large language models are well-suited for causal Q{\&}A from complex financial narratives, offering robust multilingual capabilities and effectively mitigating hallucinations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="niess-etal-2025-addressing">
<titleInfo>
<title>Addressing Hallucination in Causal Q&A: The Efficacy of Fine-tuning over Prompting in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Niess</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houssam</namePart>
<namePart type="family">Razouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stasa</namePart>
<namePart type="family">Mandic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Kern</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Moreno-Sandoval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qianqian</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents our approach and findings for participating in the FinCausal 2025 competition, which addresses causal question answering derived from financial documents, specifically English and Spanish annual reports. We investigate the effectiveness of generative models, such as Llama, in contrast to common extractive methods like BERT-based token classification. While prompt optimization and few-shot learning offer some improvements, they were insufficient for consistently outperforming extractive methods in FinCausal, suffering from hallucinations. In contrast, fine-tuning generative models was shown to be essential for minimizing hallucinations and achieving superior performance. Using our fine-tuned multilingual model for both tasks, we outperform our extractive and monolingual approaches, achieving top results for Spanish and second-best for English in the competition. Our findings indicate that fine-tuned large language models are well-suited for causal Q&A from complex financial narratives, offering robust multilingual capabilities and effectively mitigating hallucinations.</abstract>
<identifier type="citekey">niess-etal-2025-addressing</identifier>
<location>
<url>https://aclanthology.org/2025.finnlp-1.27/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>253</start>
<end>258</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Addressing Hallucination in Causal Q&A: The Efficacy of Fine-tuning over Prompting in LLMs
%A Niess, Georg
%A Razouk, Houssam
%A Mandic, Stasa
%A Kern, Roman
%Y Chen, Chung-Chi
%Y Moreno-Sandoval, Antonio
%Y Huang, Jimin
%Y Xie, Qianqian
%Y Ananiadou, Sophia
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F niess-etal-2025-addressing
%X This paper presents our approach and findings for participating in the FinCausal 2025 competition, which addresses causal question answering derived from financial documents, specifically English and Spanish annual reports. We investigate the effectiveness of generative models, such as Llama, in contrast to common extractive methods like BERT-based token classification. While prompt optimization and few-shot learning offer some improvements, they were insufficient for consistently outperforming extractive methods in FinCausal, suffering from hallucinations. In contrast, fine-tuning generative models was shown to be essential for minimizing hallucinations and achieving superior performance. Using our fine-tuned multilingual model for both tasks, we outperform our extractive and monolingual approaches, achieving top results for Spanish and second-best for English in the competition. Our findings indicate that fine-tuned large language models are well-suited for causal Q&A from complex financial narratives, offering robust multilingual capabilities and effectively mitigating hallucinations.
%U https://aclanthology.org/2025.finnlp-1.27/
%P 253-258
Markdown (Informal)
[Addressing Hallucination in Causal Q&A: The Efficacy of Fine-tuning over Prompting in LLMs](https://aclanthology.org/2025.finnlp-1.27/) (Niess et al., FinNLP 2025)
ACL
- Georg Niess, Houssam Razouk, Stasa Mandic, and Roman Kern. 2025. Addressing Hallucination in Causal Q&A: The Efficacy of Fine-tuning over Prompting in LLMs. In Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal), pages 253–258, Abu Dhabi, UAE. Association for Computational Linguistics.