@inproceedings{lamsiyah-etal-2025-trust,
title = "Trust but Verify: A Comprehensive Survey of Faithfulness Evaluation Methods in Abstractive Text Summarization",
author = "Lamsiyah, Salima and
Nourbakhsh, Aria and
Schommer, Christoph",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.74/",
pages = "633--643",
abstract = "Abstractive text summarization systems have advanced significantly with the rise of neural language models. However, they frequently suffer from issues of unfaithfulness or factual inconsistency, generating content that is not verifiably supported by the source text. This survey provides a comprehensive review of over 40 studies published between 2020 and 2025 on methods for evaluating faithfulness in abstractive summarization. We present a unified taxonomy that covers human evaluation techniques and a variety of automatic metrics, including question answering (QA)-based methods, natural language inference (NLI)-based methods, graph-based approaches, and large language model (LLM)-based evaluation. We also discuss meta-evaluation protocols that assess the quality of these metrics. In addition, we analyze a wide range of benchmark datasets, highlighting their design, scope, and relevance to emerging challenges such as long-document and domain-specific summarization. In addition, we identify critical limitations in current evaluation practices, including poor alignment with human judgment, limited robustness, and inefficiencies in handling complex summaries. We conclude by outlining future directions to support the development of more reliable, interpretable, and scalable evaluation methods. This work aims to support researchers in navigating the rapidly evolving landscape of faithfulness evaluation in summarization."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lamsiyah-etal-2025-trust">
<titleInfo>
<title>Trust but Verify: A Comprehensive Survey of Faithfulness Evaluation Methods in Abstractive Text Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salima</namePart>
<namePart type="family">Lamsiyah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aria</namePart>
<namePart type="family">Nourbakhsh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Schommer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Abstractive text summarization systems have advanced significantly with the rise of neural language models. However, they frequently suffer from issues of unfaithfulness or factual inconsistency, generating content that is not verifiably supported by the source text. This survey provides a comprehensive review of over 40 studies published between 2020 and 2025 on methods for evaluating faithfulness in abstractive summarization. We present a unified taxonomy that covers human evaluation techniques and a variety of automatic metrics, including question answering (QA)-based methods, natural language inference (NLI)-based methods, graph-based approaches, and large language model (LLM)-based evaluation. We also discuss meta-evaluation protocols that assess the quality of these metrics. In addition, we analyze a wide range of benchmark datasets, highlighting their design, scope, and relevance to emerging challenges such as long-document and domain-specific summarization. In addition, we identify critical limitations in current evaluation practices, including poor alignment with human judgment, limited robustness, and inefficiencies in handling complex summaries. We conclude by outlining future directions to support the development of more reliable, interpretable, and scalable evaluation methods. This work aims to support researchers in navigating the rapidly evolving landscape of faithfulness evaluation in summarization.</abstract>
<identifier type="citekey">lamsiyah-etal-2025-trust</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.74/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>633</start>
<end>643</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Trust but Verify: A Comprehensive Survey of Faithfulness Evaluation Methods in Abstractive Text Summarization
%A Lamsiyah, Salima
%A Nourbakhsh, Aria
%A Schommer, Christoph
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F lamsiyah-etal-2025-trust
%X Abstractive text summarization systems have advanced significantly with the rise of neural language models. However, they frequently suffer from issues of unfaithfulness or factual inconsistency, generating content that is not verifiably supported by the source text. This survey provides a comprehensive review of over 40 studies published between 2020 and 2025 on methods for evaluating faithfulness in abstractive summarization. We present a unified taxonomy that covers human evaluation techniques and a variety of automatic metrics, including question answering (QA)-based methods, natural language inference (NLI)-based methods, graph-based approaches, and large language model (LLM)-based evaluation. We also discuss meta-evaluation protocols that assess the quality of these metrics. In addition, we analyze a wide range of benchmark datasets, highlighting their design, scope, and relevance to emerging challenges such as long-document and domain-specific summarization. In addition, we identify critical limitations in current evaluation practices, including poor alignment with human judgment, limited robustness, and inefficiencies in handling complex summaries. We conclude by outlining future directions to support the development of more reliable, interpretable, and scalable evaluation methods. This work aims to support researchers in navigating the rapidly evolving landscape of faithfulness evaluation in summarization.
%U https://aclanthology.org/2025.ranlp-1.74/
%P 633-643
Markdown (Informal)
[Trust but Verify: A Comprehensive Survey of Faithfulness Evaluation Methods in Abstractive Text Summarization](https://aclanthology.org/2025.ranlp-1.74/) (Lamsiyah et al., RANLP 2025)
ACL