@inproceedings{suhr-etal-2021-crowdsourcing,
title = "Crowdsourcing Beyond Annotation: Case Studies in Benchmark Data Collection",
author = "Suhr, Alane and
Vania, Clara and
Nangia, Nikita and
Sap, Maarten and
Yatskar, Mark and
Bowman, Samuel R. and
Artzi, Yoav",
editor = "Jiang, Jing and
Vuli{\'c}, Ivan",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic {\&} Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-tutorials.1",
doi = "10.18653/v1/2021.emnlp-tutorials.1",
pages = "1--6",
abstract = "Crowdsourcing from non-experts is one of the most common approaches to collecting data and annotations in NLP. Even though it is such a fundamental tool in NLP, crowdsourcing use is largely guided by common practices and the personal experience of researchers. Developing a theory of crowdsourcing use for practical language problems remains an open challenge. However, there are various principles and practices that have proven effective in generating high quality and diverse data. This tutorial exposes NLP researchers to such data collection crowdsourcing methods and principles through a detailed discussion of a diverse set of case studies. The selection of case studies focuses on challenging settings where crowdworkers are asked to write original text or otherwise perform relatively unconstrained work. Through these case studies, we discuss in detail processes that were carefully designed to achieve data with specific properties, for example to require logical inference, grounded reasoning or conversational understanding. Each case study focuses on data collection crowdsourcing protocol details that often receive limited attention in research presentations, for example in conferences, but are critical for research success.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="suhr-etal-2021-crowdsourcing">
<titleInfo>
<title>Crowdsourcing Beyond Annotation: Case Studies in Benchmark Data Collection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alane</namePart>
<namePart type="family">Suhr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clara</namePart>
<namePart type="family">Vania</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikita</namePart>
<namePart type="family">Nangia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maarten</namePart>
<namePart type="family">Sap</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Yatskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Bowman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Artzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic & Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Crowdsourcing from non-experts is one of the most common approaches to collecting data and annotations in NLP. Even though it is such a fundamental tool in NLP, crowdsourcing use is largely guided by common practices and the personal experience of researchers. Developing a theory of crowdsourcing use for practical language problems remains an open challenge. However, there are various principles and practices that have proven effective in generating high quality and diverse data. This tutorial exposes NLP researchers to such data collection crowdsourcing methods and principles through a detailed discussion of a diverse set of case studies. The selection of case studies focuses on challenging settings where crowdworkers are asked to write original text or otherwise perform relatively unconstrained work. Through these case studies, we discuss in detail processes that were carefully designed to achieve data with specific properties, for example to require logical inference, grounded reasoning or conversational understanding. Each case study focuses on data collection crowdsourcing protocol details that often receive limited attention in research presentations, for example in conferences, but are critical for research success.</abstract>
<identifier type="citekey">suhr-etal-2021-crowdsourcing</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-tutorials.1</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-tutorials.1</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Crowdsourcing Beyond Annotation: Case Studies in Benchmark Data Collection
%A Suhr, Alane
%A Vania, Clara
%A Nangia, Nikita
%A Sap, Maarten
%A Yatskar, Mark
%A Bowman, Samuel R.
%A Artzi, Yoav
%Y Jiang, Jing
%Y Vulić, Ivan
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic & Online
%F suhr-etal-2021-crowdsourcing
%X Crowdsourcing from non-experts is one of the most common approaches to collecting data and annotations in NLP. Even though it is such a fundamental tool in NLP, crowdsourcing use is largely guided by common practices and the personal experience of researchers. Developing a theory of crowdsourcing use for practical language problems remains an open challenge. However, there are various principles and practices that have proven effective in generating high quality and diverse data. This tutorial exposes NLP researchers to such data collection crowdsourcing methods and principles through a detailed discussion of a diverse set of case studies. The selection of case studies focuses on challenging settings where crowdworkers are asked to write original text or otherwise perform relatively unconstrained work. Through these case studies, we discuss in detail processes that were carefully designed to achieve data with specific properties, for example to require logical inference, grounded reasoning or conversational understanding. Each case study focuses on data collection crowdsourcing protocol details that often receive limited attention in research presentations, for example in conferences, but are critical for research success.
%R 10.18653/v1/2021.emnlp-tutorials.1
%U https://aclanthology.org/2021.emnlp-tutorials.1
%U https://doi.org/10.18653/v1/2021.emnlp-tutorials.1
%P 1-6
Markdown (Informal)
[Crowdsourcing Beyond Annotation: Case Studies in Benchmark Data Collection](https://aclanthology.org/2021.emnlp-tutorials.1) (Suhr et al., EMNLP 2021)
ACL
- Alane Suhr, Clara Vania, Nikita Nangia, Maarten Sap, Mark Yatskar, Samuel R. Bowman, and Yoav Artzi. 2021. Crowdsourcing Beyond Annotation: Case Studies in Benchmark Data Collection. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts, pages 1–6, Punta Cana, Dominican Republic & Online. Association for Computational Linguistics.