@inproceedings{serikov-etal-2022-universal,
title = "Universal and Independent: Multilingual Probing Framework for Exhaustive Model Interpretation and Evaluation",
author = "Serikov, Oleg and
Protasov, Vitaly and
Voloshina, Ekaterina and
Knyazkova, Viktoria and
Shavrina, Tatiana",
editor = "Bastings, Jasmijn and
Belinkov, Yonatan and
Elazar, Yanai and
Hupkes, Dieuwke and
Saphra, Naomi and
Wiegreffe, Sarah",
booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.blackboxnlp-1.37",
doi = "10.18653/v1/2022.blackboxnlp-1.37",
pages = "441--456",
abstract = "Linguistic analysis of language models is one of the ways to explain and describe their reasoning, weaknesses, and limitations. In the probing part of the model interpretability research, studies concern individual languages as well as individual linguistic structures. The question arises: are the detected regularities linguistically coherent, or on the contrary, do they dissonate at the typological scale? Moreover, the majority of studies address the inherent set of languages and linguistic structures, leaving the actual typological diversity knowledge out of scope. In this paper, we present and apply the GUI-assisted framework allowing us to easily probe massive amounts of languages for all the morphosyntactic features present in the Universal Dependencies data. We show that reflecting the anglo-centric trend in NLP over the past years, most of the regularities revealed in the mBERT model are typical for the western-European languages. Our framework can be integrated with the existing probing toolboxes, model cards, and leaderboards, allowing practitioners to use and share their familiar probing methods to interpret multilingual models. Thus we propose a toolkit to systematize the multilingual flaws in multilingual models, providing a reproducible experimental setup for 104 languages and 80 morphosyntactic features.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="serikov-etal-2022-universal">
<titleInfo>
<title>Universal and Independent: Multilingual Probing Framework for Exhaustive Model Interpretation and Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vitaly</namePart>
<namePart type="family">Protasov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viktoria</namePart>
<namePart type="family">Knyazkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasmijn</namePart>
<namePart type="family">Bastings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Saphra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Wiegreffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Linguistic analysis of language models is one of the ways to explain and describe their reasoning, weaknesses, and limitations. In the probing part of the model interpretability research, studies concern individual languages as well as individual linguistic structures. The question arises: are the detected regularities linguistically coherent, or on the contrary, do they dissonate at the typological scale? Moreover, the majority of studies address the inherent set of languages and linguistic structures, leaving the actual typological diversity knowledge out of scope. In this paper, we present and apply the GUI-assisted framework allowing us to easily probe massive amounts of languages for all the morphosyntactic features present in the Universal Dependencies data. We show that reflecting the anglo-centric trend in NLP over the past years, most of the regularities revealed in the mBERT model are typical for the western-European languages. Our framework can be integrated with the existing probing toolboxes, model cards, and leaderboards, allowing practitioners to use and share their familiar probing methods to interpret multilingual models. Thus we propose a toolkit to systematize the multilingual flaws in multilingual models, providing a reproducible experimental setup for 104 languages and 80 morphosyntactic features.</abstract>
<identifier type="citekey">serikov-etal-2022-universal</identifier>
<identifier type="doi">10.18653/v1/2022.blackboxnlp-1.37</identifier>
<location>
<url>https://aclanthology.org/2022.blackboxnlp-1.37</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>441</start>
<end>456</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Universal and Independent: Multilingual Probing Framework for Exhaustive Model Interpretation and Evaluation
%A Serikov, Oleg
%A Protasov, Vitaly
%A Voloshina, Ekaterina
%A Knyazkova, Viktoria
%A Shavrina, Tatiana
%Y Bastings, Jasmijn
%Y Belinkov, Yonatan
%Y Elazar, Yanai
%Y Hupkes, Dieuwke
%Y Saphra, Naomi
%Y Wiegreffe, Sarah
%S Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F serikov-etal-2022-universal
%X Linguistic analysis of language models is one of the ways to explain and describe their reasoning, weaknesses, and limitations. In the probing part of the model interpretability research, studies concern individual languages as well as individual linguistic structures. The question arises: are the detected regularities linguistically coherent, or on the contrary, do they dissonate at the typological scale? Moreover, the majority of studies address the inherent set of languages and linguistic structures, leaving the actual typological diversity knowledge out of scope. In this paper, we present and apply the GUI-assisted framework allowing us to easily probe massive amounts of languages for all the morphosyntactic features present in the Universal Dependencies data. We show that reflecting the anglo-centric trend in NLP over the past years, most of the regularities revealed in the mBERT model are typical for the western-European languages. Our framework can be integrated with the existing probing toolboxes, model cards, and leaderboards, allowing practitioners to use and share their familiar probing methods to interpret multilingual models. Thus we propose a toolkit to systematize the multilingual flaws in multilingual models, providing a reproducible experimental setup for 104 languages and 80 morphosyntactic features.
%R 10.18653/v1/2022.blackboxnlp-1.37
%U https://aclanthology.org/2022.blackboxnlp-1.37
%U https://doi.org/10.18653/v1/2022.blackboxnlp-1.37
%P 441-456
Markdown (Informal)
[Universal and Independent: Multilingual Probing Framework for Exhaustive Model Interpretation and Evaluation](https://aclanthology.org/2022.blackboxnlp-1.37) (Serikov et al., BlackboxNLP 2022)
ACL