@inproceedings{thulke-etal-2025-listen,
title = "Listen to the Context: Towards Faithful Large Language Models for Retrieval Augmented Generation on Climate Questions",
author = "Thulke, David and
Kemmler, Jakob and
Dugast, Christian and
Ney, Hermann",
editor = "Dutia, Kalyan and
Henderson, Peter and
Leippold, Markus and
Manning, Christoper and
Morio, Gaku and
Muccione, Veruska and
Ni, Jingwei and
Schimanski, Tobias and
Stammbach, Dominik and
Singh, Alok and
Su, Alba (Ruiran) and
A. Vaghefi, Saeid",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.climatenlp-1.17/",
doi = "10.18653/v1/2025.climatenlp-1.17",
pages = "245--259",
ISBN = "979-8-89176-259-6",
abstract = "Large language models that use retrieval augmented generation have the potential to unlock valuable knowledge for researchers, policymakers, and the public by making long and technical climate-related documents more accessible. While this approach can help alleviate factual hallucinations by relying on retrieved passages as additional context, its effectiveness depends on whether the model{'}s output remains faithful to these passages. To address this, we explore the automatic assessment of faithfulness of different models in this setting. We then focus on ClimateGPT, a large language model specialised in climate science, to examine which factors in its instruction fine-tuning impact the model{'}s faithfulness. By excluding unfaithful subsets of the model{'}s training data, we develop ClimateGPT Faithful+, which achieves an improvement in faithfulness from 30{\%} to 57{\%} in supported atomic claims according to our automatic metric."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thulke-etal-2025-listen">
<titleInfo>
<title>Listen to the Context: Towards Faithful Large Language Models for Retrieval Augmented Generation on Climate Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Thulke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jakob</namePart>
<namePart type="family">Kemmler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Dugast</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kalyan</namePart>
<namePart type="family">Dutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Henderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Leippold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoper</namePart>
<namePart type="family">Manning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaku</namePart>
<namePart type="family">Morio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veruska</namePart>
<namePart type="family">Muccione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Schimanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Stammbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alba</namePart>
<namePart type="given">(Ruiran)</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saeid</namePart>
<namePart type="family">A. Vaghefi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-259-6</identifier>
</relatedItem>
<abstract>Large language models that use retrieval augmented generation have the potential to unlock valuable knowledge for researchers, policymakers, and the public by making long and technical climate-related documents more accessible. While this approach can help alleviate factual hallucinations by relying on retrieved passages as additional context, its effectiveness depends on whether the model’s output remains faithful to these passages. To address this, we explore the automatic assessment of faithfulness of different models in this setting. We then focus on ClimateGPT, a large language model specialised in climate science, to examine which factors in its instruction fine-tuning impact the model’s faithfulness. By excluding unfaithful subsets of the model’s training data, we develop ClimateGPT Faithful+, which achieves an improvement in faithfulness from 30% to 57% in supported atomic claims according to our automatic metric.</abstract>
<identifier type="citekey">thulke-etal-2025-listen</identifier>
<identifier type="doi">10.18653/v1/2025.climatenlp-1.17</identifier>
<location>
<url>https://aclanthology.org/2025.climatenlp-1.17/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>245</start>
<end>259</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Listen to the Context: Towards Faithful Large Language Models for Retrieval Augmented Generation on Climate Questions
%A Thulke, David
%A Kemmler, Jakob
%A Dugast, Christian
%A Ney, Hermann
%Y Dutia, Kalyan
%Y Henderson, Peter
%Y Leippold, Markus
%Y Manning, Christoper
%Y Morio, Gaku
%Y Muccione, Veruska
%Y Ni, Jingwei
%Y Schimanski, Tobias
%Y Stammbach, Dominik
%Y Singh, Alok
%Y Su, Alba (Ruiran)
%Y A. Vaghefi, Saeid
%S Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-259-6
%F thulke-etal-2025-listen
%X Large language models that use retrieval augmented generation have the potential to unlock valuable knowledge for researchers, policymakers, and the public by making long and technical climate-related documents more accessible. While this approach can help alleviate factual hallucinations by relying on retrieved passages as additional context, its effectiveness depends on whether the model’s output remains faithful to these passages. To address this, we explore the automatic assessment of faithfulness of different models in this setting. We then focus on ClimateGPT, a large language model specialised in climate science, to examine which factors in its instruction fine-tuning impact the model’s faithfulness. By excluding unfaithful subsets of the model’s training data, we develop ClimateGPT Faithful+, which achieves an improvement in faithfulness from 30% to 57% in supported atomic claims according to our automatic metric.
%R 10.18653/v1/2025.climatenlp-1.17
%U https://aclanthology.org/2025.climatenlp-1.17/
%U https://doi.org/10.18653/v1/2025.climatenlp-1.17
%P 245-259
Markdown (Informal)
[Listen to the Context: Towards Faithful Large Language Models for Retrieval Augmented Generation on Climate Questions](https://aclanthology.org/2025.climatenlp-1.17/) (Thulke et al., ClimateNLP 2025)
ACL