@inproceedings{molteni-etal-2020-service,
title = "Service registration chatbot: collecting and comparing dialogues from {AMT} workers and service{'}s users",
author = "Molteni, Luca and
Singh, Mittul and
Leinonen, Juho and
Leino, Katri and
Kurimo, Mikko and
Della Valle, Emanuele",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wnut-1.16",
doi = "10.18653/v1/2020.wnut-1.16",
pages = "116--121",
abstract = "Crowdsourcing is the go-to solution for data collection and annotation in the context of NLP tasks. Nevertheless, crowdsourced data is noisy by nature; the source is often unknown and additional validation work is performed to guarantee the dataset{'}s quality. In this article, we compare two crowdsourcing sources on a dialogue paraphrasing task revolving around a chatbot service. We observe that workers hired on crowdsourcing platforms produce lexically poorer and less diverse rewrites than service users engaged voluntarily. Notably enough, on dialogue clarity and optimality, the two paraphrase sources{'} human-perceived quality does not differ significantly. Furthermore, for the chatbot service, the combined crowdsourced data is enough to train a transformer-based Natural Language Generation (NLG) system. To enable similar services, we also release tools for collecting data and training the dialogue-act-based transformer-based NLG module.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="molteni-etal-2020-service">
<titleInfo>
<title>Service registration chatbot: collecting and comparing dialogues from AMT workers and service’s users</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Molteni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mittul</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juho</namePart>
<namePart type="family">Leinonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katri</namePart>
<namePart type="family">Leino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikko</namePart>
<namePart type="family">Kurimo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emanuele</namePart>
<namePart type="family">Della Valle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Crowdsourcing is the go-to solution for data collection and annotation in the context of NLP tasks. Nevertheless, crowdsourced data is noisy by nature; the source is often unknown and additional validation work is performed to guarantee the dataset’s quality. In this article, we compare two crowdsourcing sources on a dialogue paraphrasing task revolving around a chatbot service. We observe that workers hired on crowdsourcing platforms produce lexically poorer and less diverse rewrites than service users engaged voluntarily. Notably enough, on dialogue clarity and optimality, the two paraphrase sources’ human-perceived quality does not differ significantly. Furthermore, for the chatbot service, the combined crowdsourced data is enough to train a transformer-based Natural Language Generation (NLG) system. To enable similar services, we also release tools for collecting data and training the dialogue-act-based transformer-based NLG module.</abstract>
<identifier type="citekey">molteni-etal-2020-service</identifier>
<identifier type="doi">10.18653/v1/2020.wnut-1.16</identifier>
<location>
<url>https://aclanthology.org/2020.wnut-1.16</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>116</start>
<end>121</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Service registration chatbot: collecting and comparing dialogues from AMT workers and service’s users
%A Molteni, Luca
%A Singh, Mittul
%A Leinonen, Juho
%A Leino, Katri
%A Kurimo, Mikko
%A Della Valle, Emanuele
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F molteni-etal-2020-service
%X Crowdsourcing is the go-to solution for data collection and annotation in the context of NLP tasks. Nevertheless, crowdsourced data is noisy by nature; the source is often unknown and additional validation work is performed to guarantee the dataset’s quality. In this article, we compare two crowdsourcing sources on a dialogue paraphrasing task revolving around a chatbot service. We observe that workers hired on crowdsourcing platforms produce lexically poorer and less diverse rewrites than service users engaged voluntarily. Notably enough, on dialogue clarity and optimality, the two paraphrase sources’ human-perceived quality does not differ significantly. Furthermore, for the chatbot service, the combined crowdsourced data is enough to train a transformer-based Natural Language Generation (NLG) system. To enable similar services, we also release tools for collecting data and training the dialogue-act-based transformer-based NLG module.
%R 10.18653/v1/2020.wnut-1.16
%U https://aclanthology.org/2020.wnut-1.16
%U https://doi.org/10.18653/v1/2020.wnut-1.16
%P 116-121
Markdown (Informal)
[Service registration chatbot: collecting and comparing dialogues from AMT workers and service’s users](https://aclanthology.org/2020.wnut-1.16) (Molteni et al., WNUT 2020)
ACL