@inproceedings{barkan-etal-2024-llm,
title = "{LLM} Explainability via Attributive Masking Learning",
author = "Barkan, Oren and
Toib, Yonatan and
Elisha, Yehonatan and
Weill, Jonathan and
Koenigstein, Noam",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.556",
pages = "9522--9537",
abstract = "In this paper, we introduce Attributive Masking Learning (AML), a method designed for explaining language model predictions by learning input masks. AML trains an attribution model to identify influential tokens in the input for a given language model{'}s prediction. The central concept of AML is to train an auxiliary attribution model to simultaneously 1) mask as much input data as possible while ensuring that the language model{'}s prediction closely aligns with its prediction on the original input, and 2) ensure a significant change in the model{'}s prediction when applying the inverse (complement) of the same mask to the input. This dual-masking approach further enables the optimization of the explanation w.r.t. the metric of interest. We demonstrate the effectiveness of AML on both encoder-based and decoder-based language models, showcasing its superiority over a variety of state-of-the-art explanation methods on multiple benchmarks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barkan-etal-2024-llm">
<titleInfo>
<title>LLM Explainability via Attributive Masking Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oren</namePart>
<namePart type="family">Barkan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Toib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yehonatan</namePart>
<namePart type="family">Elisha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Weill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noam</namePart>
<namePart type="family">Koenigstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we introduce Attributive Masking Learning (AML), a method designed for explaining language model predictions by learning input masks. AML trains an attribution model to identify influential tokens in the input for a given language model’s prediction. The central concept of AML is to train an auxiliary attribution model to simultaneously 1) mask as much input data as possible while ensuring that the language model’s prediction closely aligns with its prediction on the original input, and 2) ensure a significant change in the model’s prediction when applying the inverse (complement) of the same mask to the input. This dual-masking approach further enables the optimization of the explanation w.r.t. the metric of interest. We demonstrate the effectiveness of AML on both encoder-based and decoder-based language models, showcasing its superiority over a variety of state-of-the-art explanation methods on multiple benchmarks.</abstract>
<identifier type="citekey">barkan-etal-2024-llm</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.556</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>9522</start>
<end>9537</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM Explainability via Attributive Masking Learning
%A Barkan, Oren
%A Toib, Yonatan
%A Elisha, Yehonatan
%A Weill, Jonathan
%A Koenigstein, Noam
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F barkan-etal-2024-llm
%X In this paper, we introduce Attributive Masking Learning (AML), a method designed for explaining language model predictions by learning input masks. AML trains an attribution model to identify influential tokens in the input for a given language model’s prediction. The central concept of AML is to train an auxiliary attribution model to simultaneously 1) mask as much input data as possible while ensuring that the language model’s prediction closely aligns with its prediction on the original input, and 2) ensure a significant change in the model’s prediction when applying the inverse (complement) of the same mask to the input. This dual-masking approach further enables the optimization of the explanation w.r.t. the metric of interest. We demonstrate the effectiveness of AML on both encoder-based and decoder-based language models, showcasing its superiority over a variety of state-of-the-art explanation methods on multiple benchmarks.
%U https://aclanthology.org/2024.findings-emnlp.556
%P 9522-9537
Markdown (Informal)
[LLM Explainability via Attributive Masking Learning](https://aclanthology.org/2024.findings-emnlp.556) (Barkan et al., Findings 2024)
ACL
- Oren Barkan, Yonatan Toib, Yehonatan Elisha, Jonathan Weill, and Noam Koenigstein. 2024. LLM Explainability via Attributive Masking Learning. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 9522–9537, Miami, Florida, USA. Association for Computational Linguistics.