@article{lyu-etal-2024-towards,
title = "Towards Faithful Model Explanation in {NLP}: A Survey",
author = "Lyu, Qing and
Apidianaki, Marianna and
Callison-Burch, Chris",
journal = "Computational Linguistics",
volume = "50",
number = "2",
month = jun,
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2024.cl-2.6",
doi = "10.1162/coli_a_00511",
pages = "657--723",
abstract = "End-to-end neural Natural Language Processing (NLP) models are notoriously difficult to understand. This has given rise to numerous efforts towards model explainability in recent years. One desideratum of model explanation is faithfulness, that is, an explanation should accurately represent the reasoning process behind the model{'}s prediction. In this survey, we review over 110 model explanation methods in NLP through the lens of faithfulness. We first discuss the definition and evaluation of faithfulness, as well as its significance for explainability. We then introduce recent advances in faithful explanation, grouping existing approaches into five categories: similarity-based methods, analysis of model-internal structures, backpropagation-based methods, counterfactual intervention, and self-explanatory models. For each category, we synthesize its representative studies, strengths, and weaknesses. Finally, we summarize their common virtues and remaining challenges, and reflect on future work directions towards faithful explainability in NLP.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lyu-etal-2024-towards">
<titleInfo>
<title>Towards Faithful Model Explanation in NLP: A Survey</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qing</namePart>
<namePart type="family">Lyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Callison-Burch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>End-to-end neural Natural Language Processing (NLP) models are notoriously difficult to understand. This has given rise to numerous efforts towards model explainability in recent years. One desideratum of model explanation is faithfulness, that is, an explanation should accurately represent the reasoning process behind the model’s prediction. In this survey, we review over 110 model explanation methods in NLP through the lens of faithfulness. We first discuss the definition and evaluation of faithfulness, as well as its significance for explainability. We then introduce recent advances in faithful explanation, grouping existing approaches into five categories: similarity-based methods, analysis of model-internal structures, backpropagation-based methods, counterfactual intervention, and self-explanatory models. For each category, we synthesize its representative studies, strengths, and weaknesses. Finally, we summarize their common virtues and remaining challenges, and reflect on future work directions towards faithful explainability in NLP.</abstract>
<identifier type="citekey">lyu-etal-2024-towards</identifier>
<identifier type="doi">10.1162/coli_a_00511</identifier>
<location>
<url>https://aclanthology.org/2024.cl-2.6</url>
</location>
<part>
<date>2024-06</date>
<detail type="volume"><number>50</number></detail>
<detail type="issue"><number>2</number></detail>
<extent unit="page">
<start>657</start>
<end>723</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Towards Faithful Model Explanation in NLP: A Survey
%A Lyu, Qing
%A Apidianaki, Marianna
%A Callison-Burch, Chris
%J Computational Linguistics
%D 2024
%8 June
%V 50
%N 2
%I MIT Press
%C Cambridge, MA
%F lyu-etal-2024-towards
%X End-to-end neural Natural Language Processing (NLP) models are notoriously difficult to understand. This has given rise to numerous efforts towards model explainability in recent years. One desideratum of model explanation is faithfulness, that is, an explanation should accurately represent the reasoning process behind the model’s prediction. In this survey, we review over 110 model explanation methods in NLP through the lens of faithfulness. We first discuss the definition and evaluation of faithfulness, as well as its significance for explainability. We then introduce recent advances in faithful explanation, grouping existing approaches into five categories: similarity-based methods, analysis of model-internal structures, backpropagation-based methods, counterfactual intervention, and self-explanatory models. For each category, we synthesize its representative studies, strengths, and weaknesses. Finally, we summarize their common virtues and remaining challenges, and reflect on future work directions towards faithful explainability in NLP.
%R 10.1162/coli_a_00511
%U https://aclanthology.org/2024.cl-2.6
%U https://doi.org/10.1162/coli_a_00511
%P 657-723
Markdown (Informal)
[Towards Faithful Model Explanation in NLP: A Survey](https://aclanthology.org/2024.cl-2.6) (Lyu et al., CL 2024)
ACL