@inproceedings{cui-2023-xiacui,
title = "xiacui at {S}em{E}val-2023 Task 11: Learning a Model in Mixed-Annotator Datasets Using Annotator Ranking Scores as Training Weights",
author = "Cui, Xia",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.148",
doi = "10.18653/v1/2023.semeval-1.148",
pages = "1076--1084",
abstract = "This paper describes the development of a system for SemEval-2023 Shared Task 11 on Learning with Disagreements (Le-Wi-Di). Labelled data plays a vital role in the development of machine learning systems. The human-annotated labels are usually considered the truth for training or validation. To obtain truth labels, a traditional way is to hire domain experts to perform an expensive annotation process. Crowd-sourcing labelling is comparably cheap, whereas it raises a question on the reliability of annotators. A common strategy in a mixed-annotator dataset with various sets of annotators for each instance is to aggregate the labels among multiple groups of annotators to obtain the truth labels. However, these annotators might not reach an agreement, and there is no guarantee of the reliability of these labels either. With further problems caused by human label variation, subjective tasks usually suffer from the different opinions provided by the annotators. In this paper, we propose two simple heuristic functions to compute the annotator ranking scores, namely AnnoHard and AnnoSoft, based on the hard labels (i.e., aggregative labels) and soft labels (i.e., cross-entropy values). By introducing these scores, we adjust the weights of the training instances to improve the learning with disagreements among the annotators.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cui-2023-xiacui">
<titleInfo>
<title>xiacui at SemEval-2023 Task 11: Learning a Model in Mixed-Annotator Datasets Using Annotator Ranking Scores as Training Weights</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xia</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the development of a system for SemEval-2023 Shared Task 11 on Learning with Disagreements (Le-Wi-Di). Labelled data plays a vital role in the development of machine learning systems. The human-annotated labels are usually considered the truth for training or validation. To obtain truth labels, a traditional way is to hire domain experts to perform an expensive annotation process. Crowd-sourcing labelling is comparably cheap, whereas it raises a question on the reliability of annotators. A common strategy in a mixed-annotator dataset with various sets of annotators for each instance is to aggregate the labels among multiple groups of annotators to obtain the truth labels. However, these annotators might not reach an agreement, and there is no guarantee of the reliability of these labels either. With further problems caused by human label variation, subjective tasks usually suffer from the different opinions provided by the annotators. In this paper, we propose two simple heuristic functions to compute the annotator ranking scores, namely AnnoHard and AnnoSoft, based on the hard labels (i.e., aggregative labels) and soft labels (i.e., cross-entropy values). By introducing these scores, we adjust the weights of the training instances to improve the learning with disagreements among the annotators.</abstract>
<identifier type="citekey">cui-2023-xiacui</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.148</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.148</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1076</start>
<end>1084</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T xiacui at SemEval-2023 Task 11: Learning a Model in Mixed-Annotator Datasets Using Annotator Ranking Scores as Training Weights
%A Cui, Xia
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F cui-2023-xiacui
%X This paper describes the development of a system for SemEval-2023 Shared Task 11 on Learning with Disagreements (Le-Wi-Di). Labelled data plays a vital role in the development of machine learning systems. The human-annotated labels are usually considered the truth for training or validation. To obtain truth labels, a traditional way is to hire domain experts to perform an expensive annotation process. Crowd-sourcing labelling is comparably cheap, whereas it raises a question on the reliability of annotators. A common strategy in a mixed-annotator dataset with various sets of annotators for each instance is to aggregate the labels among multiple groups of annotators to obtain the truth labels. However, these annotators might not reach an agreement, and there is no guarantee of the reliability of these labels either. With further problems caused by human label variation, subjective tasks usually suffer from the different opinions provided by the annotators. In this paper, we propose two simple heuristic functions to compute the annotator ranking scores, namely AnnoHard and AnnoSoft, based on the hard labels (i.e., aggregative labels) and soft labels (i.e., cross-entropy values). By introducing these scores, we adjust the weights of the training instances to improve the learning with disagreements among the annotators.
%R 10.18653/v1/2023.semeval-1.148
%U https://aclanthology.org/2023.semeval-1.148
%U https://doi.org/10.18653/v1/2023.semeval-1.148
%P 1076-1084
Markdown (Informal)
[xiacui at SemEval-2023 Task 11: Learning a Model in Mixed-Annotator Datasets Using Annotator Ranking Scores as Training Weights](https://aclanthology.org/2023.semeval-1.148) (Cui, SemEval 2023)
ACL