@inproceedings{ban-etal-2022-testing,
title = "Testing Pre-trained Language Models{'} Understanding of Distributivity via Causal Mediation Analysis",
author = "Ban, Pangbo and
Jiang, Yifan and
Liu, Tianran and
Steinert-Threlkeld, Shane",
editor = "Bastings, Jasmijn and
Belinkov, Yonatan and
Elazar, Yanai and
Hupkes, Dieuwke and
Saphra, Naomi and
Wiegreffe, Sarah",
booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.blackboxnlp-1.26",
doi = "10.18653/v1/2022.blackboxnlp-1.26",
pages = "314--324",
abstract = "To what extent do pre-trained language models grasp semantic knowledge regarding the phenomenon of distributivity? In this paper, we introduce DistNLI, a new diagnostic dataset for natural language inference that targets the semantic difference arising from distributivity, and employ the causal mediation analysis framework to quantify the model behavior and explore the underlying mechanism in this semantically-related task. We find that the extent of models{'} understanding is associated with model size and vocabulary size. We also provide insights into how models encode such high-level semantic knowledge.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ban-etal-2022-testing">
<titleInfo>
<title>Testing Pre-trained Language Models’ Understanding of Distributivity via Causal Mediation Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pangbo</namePart>
<namePart type="family">Ban</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianran</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shane</namePart>
<namePart type="family">Steinert-Threlkeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasmijn</namePart>
<namePart type="family">Bastings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Saphra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Wiegreffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To what extent do pre-trained language models grasp semantic knowledge regarding the phenomenon of distributivity? In this paper, we introduce DistNLI, a new diagnostic dataset for natural language inference that targets the semantic difference arising from distributivity, and employ the causal mediation analysis framework to quantify the model behavior and explore the underlying mechanism in this semantically-related task. We find that the extent of models’ understanding is associated with model size and vocabulary size. We also provide insights into how models encode such high-level semantic knowledge.</abstract>
<identifier type="citekey">ban-etal-2022-testing</identifier>
<identifier type="doi">10.18653/v1/2022.blackboxnlp-1.26</identifier>
<location>
<url>https://aclanthology.org/2022.blackboxnlp-1.26</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>314</start>
<end>324</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Testing Pre-trained Language Models’ Understanding of Distributivity via Causal Mediation Analysis
%A Ban, Pangbo
%A Jiang, Yifan
%A Liu, Tianran
%A Steinert-Threlkeld, Shane
%Y Bastings, Jasmijn
%Y Belinkov, Yonatan
%Y Elazar, Yanai
%Y Hupkes, Dieuwke
%Y Saphra, Naomi
%Y Wiegreffe, Sarah
%S Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F ban-etal-2022-testing
%X To what extent do pre-trained language models grasp semantic knowledge regarding the phenomenon of distributivity? In this paper, we introduce DistNLI, a new diagnostic dataset for natural language inference that targets the semantic difference arising from distributivity, and employ the causal mediation analysis framework to quantify the model behavior and explore the underlying mechanism in this semantically-related task. We find that the extent of models’ understanding is associated with model size and vocabulary size. We also provide insights into how models encode such high-level semantic knowledge.
%R 10.18653/v1/2022.blackboxnlp-1.26
%U https://aclanthology.org/2022.blackboxnlp-1.26
%U https://doi.org/10.18653/v1/2022.blackboxnlp-1.26
%P 314-324
Markdown (Informal)
[Testing Pre-trained Language Models’ Understanding of Distributivity via Causal Mediation Analysis](https://aclanthology.org/2022.blackboxnlp-1.26) (Ban et al., BlackboxNLP 2022)
ACL