@inproceedings{kunz-kuhlmann-2020-classifier,
title = "Classifier Probes May Just Learn from Linear Context Features",
author = "Kunz, Jenny and
Kuhlmann, Marco",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.450",
doi = "10.18653/v1/2020.coling-main.450",
pages = "5136--5146",
abstract = "Classifiers trained on auxiliary probing tasks are a popular tool to analyze the representations learned by neural sentence encoders such as BERT and ELMo. While many authors are aware of the difficulty to distinguish between {``}extracting the linguistic structure encoded in the representations{''} and {``}learning the probing task,{''} the validity of probing methods calls for further research. Using a neighboring word identity prediction task, we show that the token embeddings learned by neural sentence encoders contain a significant amount of information about the exact linear context of the token, and hypothesize that, with such information, learning standard probing tasks may be feasible even without additional linguistic structure. We develop this hypothesis into a framework in which analysis efforts can be scrutinized and argue that, with current models and baselines, conclusions that representations contain linguistic structure are not well-founded. Current probing methodology, such as restricting the classifier{'}s expressiveness or using strong baselines, can help to better estimate the complexity of learning, but not build a foundation for speculations about the nature of the linguistic structure encoded in the learned representations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kunz-kuhlmann-2020-classifier">
<titleInfo>
<title>Classifier Probes May Just Learn from Linear Context Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jenny</namePart>
<namePart type="family">Kunz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Kuhlmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Classifiers trained on auxiliary probing tasks are a popular tool to analyze the representations learned by neural sentence encoders such as BERT and ELMo. While many authors are aware of the difficulty to distinguish between “extracting the linguistic structure encoded in the representations” and “learning the probing task,” the validity of probing methods calls for further research. Using a neighboring word identity prediction task, we show that the token embeddings learned by neural sentence encoders contain a significant amount of information about the exact linear context of the token, and hypothesize that, with such information, learning standard probing tasks may be feasible even without additional linguistic structure. We develop this hypothesis into a framework in which analysis efforts can be scrutinized and argue that, with current models and baselines, conclusions that representations contain linguistic structure are not well-founded. Current probing methodology, such as restricting the classifier’s expressiveness or using strong baselines, can help to better estimate the complexity of learning, but not build a foundation for speculations about the nature of the linguistic structure encoded in the learned representations.</abstract>
<identifier type="citekey">kunz-kuhlmann-2020-classifier</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.450</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.450</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>5136</start>
<end>5146</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Classifier Probes May Just Learn from Linear Context Features
%A Kunz, Jenny
%A Kuhlmann, Marco
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F kunz-kuhlmann-2020-classifier
%X Classifiers trained on auxiliary probing tasks are a popular tool to analyze the representations learned by neural sentence encoders such as BERT and ELMo. While many authors are aware of the difficulty to distinguish between “extracting the linguistic structure encoded in the representations” and “learning the probing task,” the validity of probing methods calls for further research. Using a neighboring word identity prediction task, we show that the token embeddings learned by neural sentence encoders contain a significant amount of information about the exact linear context of the token, and hypothesize that, with such information, learning standard probing tasks may be feasible even without additional linguistic structure. We develop this hypothesis into a framework in which analysis efforts can be scrutinized and argue that, with current models and baselines, conclusions that representations contain linguistic structure are not well-founded. Current probing methodology, such as restricting the classifier’s expressiveness or using strong baselines, can help to better estimate the complexity of learning, but not build a foundation for speculations about the nature of the linguistic structure encoded in the learned representations.
%R 10.18653/v1/2020.coling-main.450
%U https://aclanthology.org/2020.coling-main.450
%U https://doi.org/10.18653/v1/2020.coling-main.450
%P 5136-5146
Markdown (Informal)
[Classifier Probes May Just Learn from Linear Context Features](https://aclanthology.org/2020.coling-main.450) (Kunz & Kuhlmann, COLING 2020)
ACL