@inproceedings{almeman-etal-2024-wordnet,
title = "{W}ord{N}et under Scrutiny: Dictionary Examples in the Era of Large Language Models",
author = "Almeman, Fatemah Yousef and
Schockaert, Steven and
Espinosa Anke, Luis",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1538",
pages = "17683--17695",
abstract = "Dictionary definitions play a prominent role in a wide range of NLP tasks, for instance by providing additional context about the meaning of rare and emerging terms. Many dictionaries also provide examples to illustrate the prototypical usage of words, which brings further opportunities for training or enriching NLP models. The intrinsic qualities of dictionaries, and related lexical resources such as glossaries and encyclopedias, are however still not well-understood. While there has been significant work on developing best practices, such guidance has been aimed at traditional usages of dictionaries (e.g. supporting language learners), and it is currently unclear how different quality aspects affect the NLP systems that rely on them. To address this issue, we compare WordNet, the most commonly used lexical resource in NLP, with a variety of dictionaries, as well as with examples that were generated by ChatGPT. Our analysis involves human judgments as well as automatic metrics. We furthermore study the quality of word embeddings derived from dictionary examples, as a proxy for downstream performance. We find that WordNet{'}s examples lead to lower-quality embeddings than those from the Oxford dictionary. Surprisingly, however, the ChatGPT generated examples were found to be most effective overall.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="almeman-etal-2024-wordnet">
<titleInfo>
<title>WordNet under Scrutiny: Dictionary Examples in the Era of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fatemah</namePart>
<namePart type="given">Yousef</namePart>
<namePart type="family">Almeman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Espinosa Anke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dictionary definitions play a prominent role in a wide range of NLP tasks, for instance by providing additional context about the meaning of rare and emerging terms. Many dictionaries also provide examples to illustrate the prototypical usage of words, which brings further opportunities for training or enriching NLP models. The intrinsic qualities of dictionaries, and related lexical resources such as glossaries and encyclopedias, are however still not well-understood. While there has been significant work on developing best practices, such guidance has been aimed at traditional usages of dictionaries (e.g. supporting language learners), and it is currently unclear how different quality aspects affect the NLP systems that rely on them. To address this issue, we compare WordNet, the most commonly used lexical resource in NLP, with a variety of dictionaries, as well as with examples that were generated by ChatGPT. Our analysis involves human judgments as well as automatic metrics. We furthermore study the quality of word embeddings derived from dictionary examples, as a proxy for downstream performance. We find that WordNet’s examples lead to lower-quality embeddings than those from the Oxford dictionary. Surprisingly, however, the ChatGPT generated examples were found to be most effective overall.</abstract>
<identifier type="citekey">almeman-etal-2024-wordnet</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1538</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>17683</start>
<end>17695</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WordNet under Scrutiny: Dictionary Examples in the Era of Large Language Models
%A Almeman, Fatemah Yousef
%A Schockaert, Steven
%A Espinosa Anke, Luis
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F almeman-etal-2024-wordnet
%X Dictionary definitions play a prominent role in a wide range of NLP tasks, for instance by providing additional context about the meaning of rare and emerging terms. Many dictionaries also provide examples to illustrate the prototypical usage of words, which brings further opportunities for training or enriching NLP models. The intrinsic qualities of dictionaries, and related lexical resources such as glossaries and encyclopedias, are however still not well-understood. While there has been significant work on developing best practices, such guidance has been aimed at traditional usages of dictionaries (e.g. supporting language learners), and it is currently unclear how different quality aspects affect the NLP systems that rely on them. To address this issue, we compare WordNet, the most commonly used lexical resource in NLP, with a variety of dictionaries, as well as with examples that were generated by ChatGPT. Our analysis involves human judgments as well as automatic metrics. We furthermore study the quality of word embeddings derived from dictionary examples, as a proxy for downstream performance. We find that WordNet’s examples lead to lower-quality embeddings than those from the Oxford dictionary. Surprisingly, however, the ChatGPT generated examples were found to be most effective overall.
%U https://aclanthology.org/2024.lrec-main.1538
%P 17683-17695
Markdown (Informal)
[WordNet under Scrutiny: Dictionary Examples in the Era of Large Language Models](https://aclanthology.org/2024.lrec-main.1538) (Almeman et al., LREC-COLING 2024)
ACL