@article{pyatkin-etal-2023-design,
title = "Design Choices for Crowdsourcing Implicit Discourse Relations: Revealing the Biases Introduced by Task Design",
author = "Pyatkin, Valentina and
Yung, Frances and
Scholman, Merel C. J. and
Tsarfaty, Reut and
Dagan, Ido and
Demberg, Vera",
journal = "Transactions of the Association for Computational Linguistics",
volume = "11",
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2023.tacl-1.57",
doi = "10.1162/tacl_a_00586",
pages = "1014--1032",
abstract = "Disagreement in natural language annotation has mostly been studied from a perspective of biases introduced by the annotators and the annotation frameworks. Here, we propose to analyze another source of bias{---}task design bias, which has a particularly strong impact on crowdsourced linguistic annotations where natural language is used to elicit the interpretation of lay annotators. For this purpose we look at implicit discourse relation annotation, a task that has repeatedly been shown to be difficult due to the relations{'} ambiguity. We compare the annotations of 1,200 discourse relations obtained using two distinct annotation tasks and quantify the biases of both methods across four different domains. Both methods are natural language annotation tasks designed for crowdsourcing. We show that the task design can push annotators towards certain relations and that some discourse relation senses can be better elicited with one or the other annotation approach. We also conclude that this type of bias should be taken into account when training and testing models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pyatkin-etal-2023-design">
<titleInfo>
<title>Design Choices for Crowdsourcing Implicit Discourse Relations: Revealing the Biases Introduced by Task Design</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valentina</namePart>
<namePart type="family">Pyatkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frances</namePart>
<namePart type="family">Yung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Merel</namePart>
<namePart type="given">C</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Scholman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Dagan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Disagreement in natural language annotation has mostly been studied from a perspective of biases introduced by the annotators and the annotation frameworks. Here, we propose to analyze another source of bias—task design bias, which has a particularly strong impact on crowdsourced linguistic annotations where natural language is used to elicit the interpretation of lay annotators. For this purpose we look at implicit discourse relation annotation, a task that has repeatedly been shown to be difficult due to the relations’ ambiguity. We compare the annotations of 1,200 discourse relations obtained using two distinct annotation tasks and quantify the biases of both methods across four different domains. Both methods are natural language annotation tasks designed for crowdsourcing. We show that the task design can push annotators towards certain relations and that some discourse relation senses can be better elicited with one or the other annotation approach. We also conclude that this type of bias should be taken into account when training and testing models.</abstract>
<identifier type="citekey">pyatkin-etal-2023-design</identifier>
<identifier type="doi">10.1162/tacl_a_00586</identifier>
<location>
<url>https://aclanthology.org/2023.tacl-1.57</url>
</location>
<part>
<date>2023</date>
<detail type="volume"><number>11</number></detail>
<extent unit="page">
<start>1014</start>
<end>1032</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Design Choices for Crowdsourcing Implicit Discourse Relations: Revealing the Biases Introduced by Task Design
%A Pyatkin, Valentina
%A Yung, Frances
%A Scholman, Merel C. J.
%A Tsarfaty, Reut
%A Dagan, Ido
%A Demberg, Vera
%J Transactions of the Association for Computational Linguistics
%D 2023
%V 11
%I MIT Press
%C Cambridge, MA
%F pyatkin-etal-2023-design
%X Disagreement in natural language annotation has mostly been studied from a perspective of biases introduced by the annotators and the annotation frameworks. Here, we propose to analyze another source of bias—task design bias, which has a particularly strong impact on crowdsourced linguistic annotations where natural language is used to elicit the interpretation of lay annotators. For this purpose we look at implicit discourse relation annotation, a task that has repeatedly been shown to be difficult due to the relations’ ambiguity. We compare the annotations of 1,200 discourse relations obtained using two distinct annotation tasks and quantify the biases of both methods across four different domains. Both methods are natural language annotation tasks designed for crowdsourcing. We show that the task design can push annotators towards certain relations and that some discourse relation senses can be better elicited with one or the other annotation approach. We also conclude that this type of bias should be taken into account when training and testing models.
%R 10.1162/tacl_a_00586
%U https://aclanthology.org/2023.tacl-1.57
%U https://doi.org/10.1162/tacl_a_00586
%P 1014-1032
Markdown (Informal)
[Design Choices for Crowdsourcing Implicit Discourse Relations: Revealing the Biases Introduced by Task Design](https://aclanthology.org/2023.tacl-1.57) (Pyatkin et al., TACL 2023)
ACL