@inproceedings{sato-miyazawa-2020-quality,
title = "Quality Estimation for Partially Subjective Classification Tasks via Crowdsourcing",
author = "Sato, Yoshinao and
Miyazawa, Kouki",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.29",
pages = "229--235",
abstract = "The quality estimation of artifacts generated by creators via crowdsourcing has great significance for the construction of a large-scale data resource. A common approach to this problem is to ask multiple reviewers to evaluate the same artifacts. However, the commonly used majority voting method to aggregate reviewers{'} evaluations does not work effectively for partially subjective or purely subjective tasks because reviewers{'} sensitivity and bias of evaluation tend to have a wide variety. To overcome this difficulty, we propose a probabilistic model for subjective classification tasks that incorporates the qualities of artifacts as well as the abilities and biases of creators and reviewers as latent variables to be jointly inferred. We applied this method to the partially subjective task of speech classification into the following four attitudes: agreement, disagreement, stalling, and question. The result shows that the proposed method estimates the quality of speech more effectively than a vote aggregation, measured by correlation with a fine-grained classification by experts.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sato-miyazawa-2020-quality">
<titleInfo>
<title>Quality Estimation for Partially Subjective Classification Tasks via Crowdsourcing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoshinao</namePart>
<namePart type="family">Sato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kouki</namePart>
<namePart type="family">Miyazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The quality estimation of artifacts generated by creators via crowdsourcing has great significance for the construction of a large-scale data resource. A common approach to this problem is to ask multiple reviewers to evaluate the same artifacts. However, the commonly used majority voting method to aggregate reviewers’ evaluations does not work effectively for partially subjective or purely subjective tasks because reviewers’ sensitivity and bias of evaluation tend to have a wide variety. To overcome this difficulty, we propose a probabilistic model for subjective classification tasks that incorporates the qualities of artifacts as well as the abilities and biases of creators and reviewers as latent variables to be jointly inferred. We applied this method to the partially subjective task of speech classification into the following four attitudes: agreement, disagreement, stalling, and question. The result shows that the proposed method estimates the quality of speech more effectively than a vote aggregation, measured by correlation with a fine-grained classification by experts.</abstract>
<identifier type="citekey">sato-miyazawa-2020-quality</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.29</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>229</start>
<end>235</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quality Estimation for Partially Subjective Classification Tasks via Crowdsourcing
%A Sato, Yoshinao
%A Miyazawa, Kouki
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F sato-miyazawa-2020-quality
%X The quality estimation of artifacts generated by creators via crowdsourcing has great significance for the construction of a large-scale data resource. A common approach to this problem is to ask multiple reviewers to evaluate the same artifacts. However, the commonly used majority voting method to aggregate reviewers’ evaluations does not work effectively for partially subjective or purely subjective tasks because reviewers’ sensitivity and bias of evaluation tend to have a wide variety. To overcome this difficulty, we propose a probabilistic model for subjective classification tasks that incorporates the qualities of artifacts as well as the abilities and biases of creators and reviewers as latent variables to be jointly inferred. We applied this method to the partially subjective task of speech classification into the following four attitudes: agreement, disagreement, stalling, and question. The result shows that the proposed method estimates the quality of speech more effectively than a vote aggregation, measured by correlation with a fine-grained classification by experts.
%U https://aclanthology.org/2020.lrec-1.29
%P 229-235
Markdown (Informal)
[Quality Estimation for Partially Subjective Classification Tasks via Crowdsourcing](https://aclanthology.org/2020.lrec-1.29) (Sato & Miyazawa, LREC 2020)
ACL