@inproceedings{peng-etal-2024-different,
title = "Different Tastes of Entities: Investigating Human Label Variation in Named Entity Annotations",
author = "Peng, Siyao and
Sun, Zihang and
Loftus, Sebastian and
Plank, Barbara",
editor = "Pyatkin, Valentina and
Fried, Daniel and
Stengel-Eskin, Elias and
Liu, Alisa and
Pezzelle, Sandro",
booktitle = "Proceedings of the Third Workshop on Understanding Implicit and Underspecified Language",
month = mar,
year = "2024",
address = "Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.unimplicit-1.7/",
pages = "73--81",
abstract = "Named Entity Recognition (NER) is a key information extraction task with a long-standing tradition. While recent studies address and aim to correct annotation errors via re-labeling efforts, little is known about the sources of label variation, such as text ambiguity, annotation error, or guideline divergence. This is especially the case for high-quality datasets and beyond English CoNLL03. This paper studies disagreements in expert-annotated named entity datasets for three varieties: English, Danish, and Bavarian. We show that text ambiguity and artificial guideline changes are dominant factors for diverse annotations among high-quality revisions. We survey student annotations on a subset of difficult entities and substantiate the feasibility and necessity of manifold annotations for understanding named entity ambiguities from a distributional perspective."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peng-etal-2024-different">
<titleInfo>
<title>Different Tastes of Entities: Investigating Human Label Variation in Named Entity Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siyao</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zihang</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Loftus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Understanding Implicit and Underspecified Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valentina</namePart>
<namePart type="family">Pyatkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Fried</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elias</namePart>
<namePart type="family">Stengel-Eskin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alisa</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named Entity Recognition (NER) is a key information extraction task with a long-standing tradition. While recent studies address and aim to correct annotation errors via re-labeling efforts, little is known about the sources of label variation, such as text ambiguity, annotation error, or guideline divergence. This is especially the case for high-quality datasets and beyond English CoNLL03. This paper studies disagreements in expert-annotated named entity datasets for three varieties: English, Danish, and Bavarian. We show that text ambiguity and artificial guideline changes are dominant factors for diverse annotations among high-quality revisions. We survey student annotations on a subset of difficult entities and substantiate the feasibility and necessity of manifold annotations for understanding named entity ambiguities from a distributional perspective.</abstract>
<identifier type="citekey">peng-etal-2024-different</identifier>
<location>
<url>https://aclanthology.org/2024.unimplicit-1.7/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>73</start>
<end>81</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Different Tastes of Entities: Investigating Human Label Variation in Named Entity Annotations
%A Peng, Siyao
%A Sun, Zihang
%A Loftus, Sebastian
%A Plank, Barbara
%Y Pyatkin, Valentina
%Y Fried, Daniel
%Y Stengel-Eskin, Elias
%Y Liu, Alisa
%Y Pezzelle, Sandro
%S Proceedings of the Third Workshop on Understanding Implicit and Underspecified Language
%D 2024
%8 March
%I Association for Computational Linguistics
%C Malta
%F peng-etal-2024-different
%X Named Entity Recognition (NER) is a key information extraction task with a long-standing tradition. While recent studies address and aim to correct annotation errors via re-labeling efforts, little is known about the sources of label variation, such as text ambiguity, annotation error, or guideline divergence. This is especially the case for high-quality datasets and beyond English CoNLL03. This paper studies disagreements in expert-annotated named entity datasets for three varieties: English, Danish, and Bavarian. We show that text ambiguity and artificial guideline changes are dominant factors for diverse annotations among high-quality revisions. We survey student annotations on a subset of difficult entities and substantiate the feasibility and necessity of manifold annotations for understanding named entity ambiguities from a distributional perspective.
%U https://aclanthology.org/2024.unimplicit-1.7/
%P 73-81
Markdown (Informal)
[Different Tastes of Entities: Investigating Human Label Variation in Named Entity Annotations](https://aclanthology.org/2024.unimplicit-1.7/) (Peng et al., unimplicit 2024)
ACL