@inproceedings{eger-etal-2019-pitfalls,
title = "Pitfalls in the Evaluation of Sentence Embeddings",
author = {Eger, Steffen and
R{\"u}ckl{\'e}, Andreas and
Gurevych, Iryna},
editor = "Augenstein, Isabelle and
Gella, Spandana and
Ruder, Sebastian and
Kann, Katharina and
Can, Burcu and
Welbl, Johannes and
Conneau, Alexis and
Ren, Xiang and
Rei, Marek",
booktitle = "Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4308",
doi = "10.18653/v1/W19-4308",
pages = "55--60",
abstract = "Deep learning models continuously break new records across different NLP tasks. At the same time, their success exposes weaknesses of model evaluation. Here, we compile several key pitfalls of evaluation of sentence embeddings, a currently very popular NLP paradigm. These pitfalls include the comparison of embeddings of different sizes, normalization of embeddings, and the low (and diverging) correlations between transfer and probing tasks. Our motivation is to challenge the current evaluation of sentence embeddings and to provide an easy-to-access reference for future research. Based on our insights, we also recommend better practices for better future evaluations of sentence embeddings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eger-etal-2019-pitfalls">
<titleInfo>
<title>Pitfalls in the Evaluation of Sentence Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burcu</namePart>
<namePart type="family">Can</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Welbl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Conneau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Rei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep learning models continuously break new records across different NLP tasks. At the same time, their success exposes weaknesses of model evaluation. Here, we compile several key pitfalls of evaluation of sentence embeddings, a currently very popular NLP paradigm. These pitfalls include the comparison of embeddings of different sizes, normalization of embeddings, and the low (and diverging) correlations between transfer and probing tasks. Our motivation is to challenge the current evaluation of sentence embeddings and to provide an easy-to-access reference for future research. Based on our insights, we also recommend better practices for better future evaluations of sentence embeddings.</abstract>
<identifier type="citekey">eger-etal-2019-pitfalls</identifier>
<identifier type="doi">10.18653/v1/W19-4308</identifier>
<location>
<url>https://aclanthology.org/W19-4308</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>55</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pitfalls in the Evaluation of Sentence Embeddings
%A Eger, Steffen
%A Rücklé, Andreas
%A Gurevych, Iryna
%Y Augenstein, Isabelle
%Y Gella, Spandana
%Y Ruder, Sebastian
%Y Kann, Katharina
%Y Can, Burcu
%Y Welbl, Johannes
%Y Conneau, Alexis
%Y Ren, Xiang
%Y Rei, Marek
%S Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019)
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F eger-etal-2019-pitfalls
%X Deep learning models continuously break new records across different NLP tasks. At the same time, their success exposes weaknesses of model evaluation. Here, we compile several key pitfalls of evaluation of sentence embeddings, a currently very popular NLP paradigm. These pitfalls include the comparison of embeddings of different sizes, normalization of embeddings, and the low (and diverging) correlations between transfer and probing tasks. Our motivation is to challenge the current evaluation of sentence embeddings and to provide an easy-to-access reference for future research. Based on our insights, we also recommend better practices for better future evaluations of sentence embeddings.
%R 10.18653/v1/W19-4308
%U https://aclanthology.org/W19-4308
%U https://doi.org/10.18653/v1/W19-4308
%P 55-60
Markdown (Informal)
[Pitfalls in the Evaluation of Sentence Embeddings](https://aclanthology.org/W19-4308) (Eger et al., RepL4NLP 2019)
ACL
- Steffen Eger, Andreas Rücklé, and Iryna Gurevych. 2019. Pitfalls in the Evaluation of Sentence Embeddings. In Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019), pages 55–60, Florence, Italy. Association for Computational Linguistics.