@inproceedings{eger-etal-2020-probe,
title = "How to Probe Sentence Embeddings in Low-Resource Languages: On Structural Design Choices for Probing Task Evaluation",
author = "Eger, Steffen and
Daxenberger, Johannes and
Gurevych, Iryna",
editor = "Fern{\'a}ndez, Raquel and
Linzen, Tal",
booktitle = "Proceedings of the 24th Conference on Computational Natural Language Learning",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.conll-1.8",
doi = "10.18653/v1/2020.conll-1.8",
pages = "108--118",
abstract = "Sentence encoders map sentences to real valued vectors for use in downstream applications. To peek into these representations{---}e.g., to increase interpretability of their results{---}probing tasks have been designed which query them for linguistic knowledge. However, designing probing tasks for lesser-resourced languages is tricky, because these often lack largescale annotated data or (high-quality) dependency parsers as a prerequisite of probing task design in English. To investigate how to probe sentence embeddings in such cases, we investigate sensitivity of probing task results to structural design choices, conducting the first such large scale study. We show that design choices like size of the annotated probing dataset and type of classifier used for evaluation do (sometimes substantially) influence probing outcomes. We then probe embeddings in a multilingual setup with design choices that lie in a {`}stable region{'}, as we identify for English, and find that results on English do not transfer to other languages. Fairer and more comprehensive sentence-level probing evaluation should thus be carried out on multiple languages in the future.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eger-etal-2020-probe">
<titleInfo>
<title>How to Probe Sentence Embeddings in Low-Resource Languages: On Structural Design Choices for Probing Task Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Daxenberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentence encoders map sentences to real valued vectors for use in downstream applications. To peek into these representations—e.g., to increase interpretability of their results—probing tasks have been designed which query them for linguistic knowledge. However, designing probing tasks for lesser-resourced languages is tricky, because these often lack largescale annotated data or (high-quality) dependency parsers as a prerequisite of probing task design in English. To investigate how to probe sentence embeddings in such cases, we investigate sensitivity of probing task results to structural design choices, conducting the first such large scale study. We show that design choices like size of the annotated probing dataset and type of classifier used for evaluation do (sometimes substantially) influence probing outcomes. We then probe embeddings in a multilingual setup with design choices that lie in a ‘stable region’, as we identify for English, and find that results on English do not transfer to other languages. Fairer and more comprehensive sentence-level probing evaluation should thus be carried out on multiple languages in the future.</abstract>
<identifier type="citekey">eger-etal-2020-probe</identifier>
<identifier type="doi">10.18653/v1/2020.conll-1.8</identifier>
<location>
<url>https://aclanthology.org/2020.conll-1.8</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>108</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How to Probe Sentence Embeddings in Low-Resource Languages: On Structural Design Choices for Probing Task Evaluation
%A Eger, Steffen
%A Daxenberger, Johannes
%A Gurevych, Iryna
%Y Fernández, Raquel
%Y Linzen, Tal
%S Proceedings of the 24th Conference on Computational Natural Language Learning
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F eger-etal-2020-probe
%X Sentence encoders map sentences to real valued vectors for use in downstream applications. To peek into these representations—e.g., to increase interpretability of their results—probing tasks have been designed which query them for linguistic knowledge. However, designing probing tasks for lesser-resourced languages is tricky, because these often lack largescale annotated data or (high-quality) dependency parsers as a prerequisite of probing task design in English. To investigate how to probe sentence embeddings in such cases, we investigate sensitivity of probing task results to structural design choices, conducting the first such large scale study. We show that design choices like size of the annotated probing dataset and type of classifier used for evaluation do (sometimes substantially) influence probing outcomes. We then probe embeddings in a multilingual setup with design choices that lie in a ‘stable region’, as we identify for English, and find that results on English do not transfer to other languages. Fairer and more comprehensive sentence-level probing evaluation should thus be carried out on multiple languages in the future.
%R 10.18653/v1/2020.conll-1.8
%U https://aclanthology.org/2020.conll-1.8
%U https://doi.org/10.18653/v1/2020.conll-1.8
%P 108-118
Markdown (Informal)
[How to Probe Sentence Embeddings in Low-Resource Languages: On Structural Design Choices for Probing Task Evaluation](https://aclanthology.org/2020.conll-1.8) (Eger et al., CoNLL 2020)
ACL