@inproceedings{andresen-etal-2020-modeling,
title = "Modeling Ambiguity with Many Annotators and Self-Assessments of Annotator Certainty",
author = "Andresen, Melanie and
Vauth, Michael and
Zinsmeister, Heike",
editor = "Dipper, Stefanie and
Zeldes, Amir",
booktitle = "Proceedings of the 14th Linguistic Annotation Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.law-1.5",
pages = "48--59",
abstract = "Most annotation efforts assume that annotators will agree on labels, if the annotation categories are well-defined and documented in annotation guidelines. However, this is not always true. For instance, content-related questions such as {`}Is this sentence about topic X?{'} are unlikely to elicit the same answer from all annotators. Additional specifications in the guidelines are helpful to some extent, but can soon get overspecified by rules that cannot be justified by a research question. In this study, we model the semantic category {`}illness{'} and its use in a gradual way. For this purpose, we (i) ask many annotators (30 votes per item, 960 items) for their opinion in a crowdsourcing experiment, (ii) ask annotators to indicate their certainty with respect to their annotation, and (iii) compare this across two different text types. We show that results of multiple annotations and average annotator certainty correlate, but many ambiguities can only be captured if several people contribute. The annotated data allow us to filter for sentences with high or low agreement and analyze causes of disagreement, thus getting a better understanding of people{'}s perception of illness{---}as an example of a semantic category{---}as well as of the content of our annotated texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="andresen-etal-2020-modeling">
<titleInfo>
<title>Modeling Ambiguity with Many Annotators and Self-Assessments of Annotator Certainty</title>
</titleInfo>
<name type="personal">
<namePart type="given">Melanie</namePart>
<namePart type="family">Andresen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Vauth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heike</namePart>
<namePart type="family">Zinsmeister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th Linguistic Annotation Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Dipper</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most annotation efforts assume that annotators will agree on labels, if the annotation categories are well-defined and documented in annotation guidelines. However, this is not always true. For instance, content-related questions such as ‘Is this sentence about topic X?’ are unlikely to elicit the same answer from all annotators. Additional specifications in the guidelines are helpful to some extent, but can soon get overspecified by rules that cannot be justified by a research question. In this study, we model the semantic category ‘illness’ and its use in a gradual way. For this purpose, we (i) ask many annotators (30 votes per item, 960 items) for their opinion in a crowdsourcing experiment, (ii) ask annotators to indicate their certainty with respect to their annotation, and (iii) compare this across two different text types. We show that results of multiple annotations and average annotator certainty correlate, but many ambiguities can only be captured if several people contribute. The annotated data allow us to filter for sentences with high or low agreement and analyze causes of disagreement, thus getting a better understanding of people’s perception of illness—as an example of a semantic category—as well as of the content of our annotated texts.</abstract>
<identifier type="citekey">andresen-etal-2020-modeling</identifier>
<location>
<url>https://aclanthology.org/2020.law-1.5</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>48</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modeling Ambiguity with Many Annotators and Self-Assessments of Annotator Certainty
%A Andresen, Melanie
%A Vauth, Michael
%A Zinsmeister, Heike
%Y Dipper, Stefanie
%Y Zeldes, Amir
%S Proceedings of the 14th Linguistic Annotation Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain
%F andresen-etal-2020-modeling
%X Most annotation efforts assume that annotators will agree on labels, if the annotation categories are well-defined and documented in annotation guidelines. However, this is not always true. For instance, content-related questions such as ‘Is this sentence about topic X?’ are unlikely to elicit the same answer from all annotators. Additional specifications in the guidelines are helpful to some extent, but can soon get overspecified by rules that cannot be justified by a research question. In this study, we model the semantic category ‘illness’ and its use in a gradual way. For this purpose, we (i) ask many annotators (30 votes per item, 960 items) for their opinion in a crowdsourcing experiment, (ii) ask annotators to indicate their certainty with respect to their annotation, and (iii) compare this across two different text types. We show that results of multiple annotations and average annotator certainty correlate, but many ambiguities can only be captured if several people contribute. The annotated data allow us to filter for sentences with high or low agreement and analyze causes of disagreement, thus getting a better understanding of people’s perception of illness—as an example of a semantic category—as well as of the content of our annotated texts.
%U https://aclanthology.org/2020.law-1.5
%P 48-59
Markdown (Informal)
[Modeling Ambiguity with Many Annotators and Self-Assessments of Annotator Certainty](https://aclanthology.org/2020.law-1.5) (Andresen et al., LAW 2020)
ACL