@inproceedings{amirzadeh-etal-2024-language,
title = "How Language Models Prioritize Contextual Grammatical Cues?",
author = "Amirzadeh, Hamidreza and
Alishahi, Afra and
Mohebbi, Hosein",
editor = "Belinkov, Yonatan and
Kim, Najoung and
Jumelet, Jaap and
Mohebbi, Hosein and
Mueller, Aaron and
Chen, Hanjie",
booktitle = "Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2024",
address = "Miami, Florida, US",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.blackboxnlp-1.21",
pages = "315--336",
abstract = "Transformer-based language models have shown an excellent ability to effectively capture and utilize contextual information. Although various analysis techniques have been used to quantify and trace the contribution of single contextual cues to a target task such as subject-verb agreement or coreference resolution, scenarios in which multiple relevant cues are available in the context remain underexplored.In this paper, we investigate how language models handle gender agreement when multiple gender cue words are present, each capable of independently disambiguating a target gender pronoun. We analyze two widely used Transformer-based models: BERT, an encoder-based, and GPT-2, a decoder-based model.Our analysis employs two complementary approaches: context mixing analysis, which tracks information flow within the model, and a variant of activation patching, which measures the impact of cues on the model{'}s prediction. We find that BERT tends to prioritize the first cue in the context to form both the target word representations and the model{'}s prediction, while GPT-2 relies more on the final cue. Our findings reveal striking differences in how encoder-based and decoder-based models prioritize and use contextual information for their predictions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amirzadeh-etal-2024-language">
<titleInfo>
<title>How Language Models Prioritize Contextual Grammatical Cues?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hamidreza</namePart>
<namePart type="family">Amirzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afra</namePart>
<namePart type="family">Alishahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Mohebbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Najoung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaap</namePart>
<namePart type="family">Jumelet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Mohebbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanjie</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, US</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transformer-based language models have shown an excellent ability to effectively capture and utilize contextual information. Although various analysis techniques have been used to quantify and trace the contribution of single contextual cues to a target task such as subject-verb agreement or coreference resolution, scenarios in which multiple relevant cues are available in the context remain underexplored.In this paper, we investigate how language models handle gender agreement when multiple gender cue words are present, each capable of independently disambiguating a target gender pronoun. We analyze two widely used Transformer-based models: BERT, an encoder-based, and GPT-2, a decoder-based model.Our analysis employs two complementary approaches: context mixing analysis, which tracks information flow within the model, and a variant of activation patching, which measures the impact of cues on the model’s prediction. We find that BERT tends to prioritize the first cue in the context to form both the target word representations and the model’s prediction, while GPT-2 relies more on the final cue. Our findings reveal striking differences in how encoder-based and decoder-based models prioritize and use contextual information for their predictions.</abstract>
<identifier type="citekey">amirzadeh-etal-2024-language</identifier>
<location>
<url>https://aclanthology.org/2024.blackboxnlp-1.21</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>315</start>
<end>336</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Language Models Prioritize Contextual Grammatical Cues?
%A Amirzadeh, Hamidreza
%A Alishahi, Afra
%A Mohebbi, Hosein
%Y Belinkov, Yonatan
%Y Kim, Najoung
%Y Jumelet, Jaap
%Y Mohebbi, Hosein
%Y Mueller, Aaron
%Y Chen, Hanjie
%S Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, US
%F amirzadeh-etal-2024-language
%X Transformer-based language models have shown an excellent ability to effectively capture and utilize contextual information. Although various analysis techniques have been used to quantify and trace the contribution of single contextual cues to a target task such as subject-verb agreement or coreference resolution, scenarios in which multiple relevant cues are available in the context remain underexplored.In this paper, we investigate how language models handle gender agreement when multiple gender cue words are present, each capable of independently disambiguating a target gender pronoun. We analyze two widely used Transformer-based models: BERT, an encoder-based, and GPT-2, a decoder-based model.Our analysis employs two complementary approaches: context mixing analysis, which tracks information flow within the model, and a variant of activation patching, which measures the impact of cues on the model’s prediction. We find that BERT tends to prioritize the first cue in the context to form both the target word representations and the model’s prediction, while GPT-2 relies more on the final cue. Our findings reveal striking differences in how encoder-based and decoder-based models prioritize and use contextual information for their predictions.
%U https://aclanthology.org/2024.blackboxnlp-1.21
%P 315-336
Markdown (Informal)
[How Language Models Prioritize Contextual Grammatical Cues?](https://aclanthology.org/2024.blackboxnlp-1.21) (Amirzadeh et al., BlackboxNLP 2024)
ACL
- Hamidreza Amirzadeh, Afra Alishahi, and Hosein Mohebbi. 2024. How Language Models Prioritize Contextual Grammatical Cues?. In Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP, pages 315–336, Miami, Florida, US. Association for Computational Linguistics.