@inproceedings{beck-etal-2020-representation,
title = "Representation Problems in Linguistic Annotations: Ambiguity, Variation, Uncertainty, Error and Bias",
author = "Beck, Christin and
Booth, Hannah and
El-Assady, Mennatallah and
Butt, Miriam",
editor = "Dipper, Stefanie and
Zeldes, Amir",
booktitle = "Proceedings of the 14th Linguistic Annotation Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.law-1.6",
pages = "60--73",
abstract = "The development of linguistic corpora is fraught with various problems of annotation and representation. These constitute a very real challenge for the development and use of annotated corpora, but as yet not much literature exists on how to address the underlying problems. In this paper, we identify and discuss five sources of representation problems, which are independent though interrelated: ambiguity, variation, uncertainty, error and bias. We outline and characterize these sources, discussing how their improper treatment can have stark consequences for research outcomes. Finally, we discuss how an adequate treatment can inform corpus-related linguistic research, both computational and theoretical, improving the reliability of research results and NLP models, as well as informing the more general reproducibility issue.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="beck-etal-2020-representation">
<titleInfo>
<title>Representation Problems in Linguistic Annotations: Ambiguity, Variation, Uncertainty, Error and Bias</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christin</namePart>
<namePart type="family">Beck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="family">Booth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mennatallah</namePart>
<namePart type="family">El-Assady</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Butt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th Linguistic Annotation Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Dipper</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The development of linguistic corpora is fraught with various problems of annotation and representation. These constitute a very real challenge for the development and use of annotated corpora, but as yet not much literature exists on how to address the underlying problems. In this paper, we identify and discuss five sources of representation problems, which are independent though interrelated: ambiguity, variation, uncertainty, error and bias. We outline and characterize these sources, discussing how their improper treatment can have stark consequences for research outcomes. Finally, we discuss how an adequate treatment can inform corpus-related linguistic research, both computational and theoretical, improving the reliability of research results and NLP models, as well as informing the more general reproducibility issue.</abstract>
<identifier type="citekey">beck-etal-2020-representation</identifier>
<location>
<url>https://aclanthology.org/2020.law-1.6</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>60</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Representation Problems in Linguistic Annotations: Ambiguity, Variation, Uncertainty, Error and Bias
%A Beck, Christin
%A Booth, Hannah
%A El-Assady, Mennatallah
%A Butt, Miriam
%Y Dipper, Stefanie
%Y Zeldes, Amir
%S Proceedings of the 14th Linguistic Annotation Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain
%F beck-etal-2020-representation
%X The development of linguistic corpora is fraught with various problems of annotation and representation. These constitute a very real challenge for the development and use of annotated corpora, but as yet not much literature exists on how to address the underlying problems. In this paper, we identify and discuss five sources of representation problems, which are independent though interrelated: ambiguity, variation, uncertainty, error and bias. We outline and characterize these sources, discussing how their improper treatment can have stark consequences for research outcomes. Finally, we discuss how an adequate treatment can inform corpus-related linguistic research, both computational and theoretical, improving the reliability of research results and NLP models, as well as informing the more general reproducibility issue.
%U https://aclanthology.org/2020.law-1.6
%P 60-73
Markdown (Informal)
[Representation Problems in Linguistic Annotations: Ambiguity, Variation, Uncertainty, Error and Bias](https://aclanthology.org/2020.law-1.6) (Beck et al., LAW 2020)
ACL