@inproceedings{dycke-etal-2022-yes,
title = "Yes-Yes-Yes: Proactive Data Collection for {ACL} Rolling Review and Beyond",
author = "Dycke, Nils and
Kuznetsov, Ilia and
Gurevych, Iryna",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.23",
doi = "10.18653/v1/2022.findings-emnlp.23",
pages = "300--318",
abstract = "The shift towards publicly available text sources has enabled language processing at unprecedented scale, yet leaves under-serviced the domains where public and openly licensed data is scarce. Proactively collecting text data for research is a viable strategy to address this scarcity, but lacks systematic methodology taking into account the many ethical, legal and confidentiality-related aspects of data collection. Our work presents a case study on proactive data collection in peer review {--} a challenging and under-resourced NLP domain. We outline ethical and legal desiderata for proactive data collection and introduce {``}Yes-Yes-Yes{''}, the first donation-based peer reviewing data collection workflow that meets these requirements. We report on the implementation of Yes-Yes-Yes at ACL Rolling Review and empirically study the implications of proactive data collection for the dataset size and the biases induced by the donation behavior on the peer reviewing platform.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dycke-etal-2022-yes">
<titleInfo>
<title>Yes-Yes-Yes: Proactive Data Collection for ACL Rolling Review and Beyond</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Dycke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilia</namePart>
<namePart type="family">Kuznetsov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The shift towards publicly available text sources has enabled language processing at unprecedented scale, yet leaves under-serviced the domains where public and openly licensed data is scarce. Proactively collecting text data for research is a viable strategy to address this scarcity, but lacks systematic methodology taking into account the many ethical, legal and confidentiality-related aspects of data collection. Our work presents a case study on proactive data collection in peer review – a challenging and under-resourced NLP domain. We outline ethical and legal desiderata for proactive data collection and introduce “Yes-Yes-Yes”, the first donation-based peer reviewing data collection workflow that meets these requirements. We report on the implementation of Yes-Yes-Yes at ACL Rolling Review and empirically study the implications of proactive data collection for the dataset size and the biases induced by the donation behavior on the peer reviewing platform.</abstract>
<identifier type="citekey">dycke-etal-2022-yes</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.23</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.23</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>300</start>
<end>318</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Yes-Yes-Yes: Proactive Data Collection for ACL Rolling Review and Beyond
%A Dycke, Nils
%A Kuznetsov, Ilia
%A Gurevych, Iryna
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F dycke-etal-2022-yes
%X The shift towards publicly available text sources has enabled language processing at unprecedented scale, yet leaves under-serviced the domains where public and openly licensed data is scarce. Proactively collecting text data for research is a viable strategy to address this scarcity, but lacks systematic methodology taking into account the many ethical, legal and confidentiality-related aspects of data collection. Our work presents a case study on proactive data collection in peer review – a challenging and under-resourced NLP domain. We outline ethical and legal desiderata for proactive data collection and introduce “Yes-Yes-Yes”, the first donation-based peer reviewing data collection workflow that meets these requirements. We report on the implementation of Yes-Yes-Yes at ACL Rolling Review and empirically study the implications of proactive data collection for the dataset size and the biases induced by the donation behavior on the peer reviewing platform.
%R 10.18653/v1/2022.findings-emnlp.23
%U https://aclanthology.org/2022.findings-emnlp.23
%U https://doi.org/10.18653/v1/2022.findings-emnlp.23
%P 300-318
Markdown (Informal)
[Yes-Yes-Yes: Proactive Data Collection for ACL Rolling Review and Beyond](https://aclanthology.org/2022.findings-emnlp.23) (Dycke et al., Findings 2022)
ACL