@inproceedings{senel-schutze-2021-wink,
title = "Does She Wink or Does She Nod? A Challenging Benchmark for Evaluating Word Understanding of Language Models",
author = {Senel, Lutfi Kerem and
Sch{\"u}tze, Hinrich},
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.42",
doi = "10.18653/v1/2021.eacl-main.42",
pages = "532--538",
abstract = "Recent progress in pretraining language models on large corpora has resulted in significant performance gains on many NLP tasks. These large models acquire linguistic knowledge during pretraining, which helps to improve performance on downstream tasks via fine-tuning. To assess what kind of knowledge is acquired, language models are commonly probed by querying them with {`}fill in the blank{'} style cloze questions. Existing probing datasets mainly focus on knowledge about relations between words and entities. We introduce WDLMPro (Word Definitions Language Model Probing) to evaluate word understanding directly using dictionary definitions of words. In our experiments, three popular pretrained language models struggle to match words and their definitions. This indicates that they understand many words poorly and that our new probing task is a difficult challenge that could help guide research on LMs in the future.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="senel-schutze-2021-wink">
<titleInfo>
<title>Does She Wink or Does She Nod? A Challenging Benchmark for Evaluating Word Understanding of Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lutfi</namePart>
<namePart type="given">Kerem</namePart>
<namePart type="family">Senel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schütze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent progress in pretraining language models on large corpora has resulted in significant performance gains on many NLP tasks. These large models acquire linguistic knowledge during pretraining, which helps to improve performance on downstream tasks via fine-tuning. To assess what kind of knowledge is acquired, language models are commonly probed by querying them with ‘fill in the blank’ style cloze questions. Existing probing datasets mainly focus on knowledge about relations between words and entities. We introduce WDLMPro (Word Definitions Language Model Probing) to evaluate word understanding directly using dictionary definitions of words. In our experiments, three popular pretrained language models struggle to match words and their definitions. This indicates that they understand many words poorly and that our new probing task is a difficult challenge that could help guide research on LMs in the future.</abstract>
<identifier type="citekey">senel-schutze-2021-wink</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.42</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.42</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>532</start>
<end>538</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Does She Wink or Does She Nod? A Challenging Benchmark for Evaluating Word Understanding of Language Models
%A Senel, Lutfi Kerem
%A Schütze, Hinrich
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F senel-schutze-2021-wink
%X Recent progress in pretraining language models on large corpora has resulted in significant performance gains on many NLP tasks. These large models acquire linguistic knowledge during pretraining, which helps to improve performance on downstream tasks via fine-tuning. To assess what kind of knowledge is acquired, language models are commonly probed by querying them with ‘fill in the blank’ style cloze questions. Existing probing datasets mainly focus on knowledge about relations between words and entities. We introduce WDLMPro (Word Definitions Language Model Probing) to evaluate word understanding directly using dictionary definitions of words. In our experiments, three popular pretrained language models struggle to match words and their definitions. This indicates that they understand many words poorly and that our new probing task is a difficult challenge that could help guide research on LMs in the future.
%R 10.18653/v1/2021.eacl-main.42
%U https://aclanthology.org/2021.eacl-main.42
%U https://doi.org/10.18653/v1/2021.eacl-main.42
%P 532-538
Markdown (Informal)
[Does She Wink or Does She Nod? A Challenging Benchmark for Evaluating Word Understanding of Language Models](https://aclanthology.org/2021.eacl-main.42) (Senel & Schütze, EACL 2021)
ACL