@inproceedings{shapiro-etal-2021-multilabel-approach,
title = "A multilabel approach to morphosyntactic probing",
author = "Shapiro, Naomi and
Paullada, Amandalynne and
Steinert-Threlkeld, Shane",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.382",
doi = "10.18653/v1/2021.findings-emnlp.382",
pages = "4486--4524",
abstract = "We propose using a multilabel probing task to assess the morphosyntactic representations of multilingual word embeddings. This tweak on canonical probing makes it easy to explore morphosyntactic representations, both holistically and at the level of individual features (e.g., gender, number, case), and leads more naturally to the study of how language models handle co-occurring features (e.g., agreement phenomena). We demonstrate this task with multilingual BERT (Devlin et al., 2018), training probes for seven typologically diverse languages: Afrikaans, Croatian, Finnish, Hebrew, Korean, Spanish, and Turkish. Through this simple but robust paradigm, we verify that multilingual BERT renders many morphosyntactic features simultaneously extractable. We further evaluate the probes on six held-out languages: Arabic, Chinese, Marathi, Slovenian, Tagalog, and Yoruba. This zero-shot style of probing has the added benefit of revealing which cross-linguistic properties a language model recognizes as being shared by multiple languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shapiro-etal-2021-multilabel-approach">
<titleInfo>
<title>A multilabel approach to morphosyntactic probing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Shapiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amandalynne</namePart>
<namePart type="family">Paullada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shane</namePart>
<namePart type="family">Steinert-Threlkeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose using a multilabel probing task to assess the morphosyntactic representations of multilingual word embeddings. This tweak on canonical probing makes it easy to explore morphosyntactic representations, both holistically and at the level of individual features (e.g., gender, number, case), and leads more naturally to the study of how language models handle co-occurring features (e.g., agreement phenomena). We demonstrate this task with multilingual BERT (Devlin et al., 2018), training probes for seven typologically diverse languages: Afrikaans, Croatian, Finnish, Hebrew, Korean, Spanish, and Turkish. Through this simple but robust paradigm, we verify that multilingual BERT renders many morphosyntactic features simultaneously extractable. We further evaluate the probes on six held-out languages: Arabic, Chinese, Marathi, Slovenian, Tagalog, and Yoruba. This zero-shot style of probing has the added benefit of revealing which cross-linguistic properties a language model recognizes as being shared by multiple languages.</abstract>
<identifier type="citekey">shapiro-etal-2021-multilabel-approach</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.382</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.382</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>4486</start>
<end>4524</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A multilabel approach to morphosyntactic probing
%A Shapiro, Naomi
%A Paullada, Amandalynne
%A Steinert-Threlkeld, Shane
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F shapiro-etal-2021-multilabel-approach
%X We propose using a multilabel probing task to assess the morphosyntactic representations of multilingual word embeddings. This tweak on canonical probing makes it easy to explore morphosyntactic representations, both holistically and at the level of individual features (e.g., gender, number, case), and leads more naturally to the study of how language models handle co-occurring features (e.g., agreement phenomena). We demonstrate this task with multilingual BERT (Devlin et al., 2018), training probes for seven typologically diverse languages: Afrikaans, Croatian, Finnish, Hebrew, Korean, Spanish, and Turkish. Through this simple but robust paradigm, we verify that multilingual BERT renders many morphosyntactic features simultaneously extractable. We further evaluate the probes on six held-out languages: Arabic, Chinese, Marathi, Slovenian, Tagalog, and Yoruba. This zero-shot style of probing has the added benefit of revealing which cross-linguistic properties a language model recognizes as being shared by multiple languages.
%R 10.18653/v1/2021.findings-emnlp.382
%U https://aclanthology.org/2021.findings-emnlp.382
%U https://doi.org/10.18653/v1/2021.findings-emnlp.382
%P 4486-4524
Markdown (Informal)
[A multilabel approach to morphosyntactic probing](https://aclanthology.org/2021.findings-emnlp.382) (Shapiro et al., Findings 2021)
ACL
- Naomi Shapiro, Amandalynne Paullada, and Shane Steinert-Threlkeld. 2021. A multilabel approach to morphosyntactic probing. In Findings of the Association for Computational Linguistics: EMNLP 2021, pages 4486–4524, Punta Cana, Dominican Republic. Association for Computational Linguistics.