@inproceedings{mokhberian-etal-2024-capturing,
title = "Capturing Perspectives of Crowdsourced Annotators in Subjective Learning Tasks",
author = "Mokhberian, Negar and
Marmarelis, Myrl and
Hopp, Frederic and
Basile, Valerio and
Morstatter, Fred and
Lerman, Kristina",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.407",
doi = "10.18653/v1/2024.naacl-long.407",
pages = "7337--7349",
abstract = "Supervised classification heavily depends on datasets annotated by humans. However, in subjective tasks such as toxicity classification, these annotations often exhibit low agreement among raters. Annotations have commonly been aggregated by employing methods like majority voting to determine a single ground truth label. In subjective tasks, aggregating labels will result in biased labeling and, consequently, biased models that can overlook minority opinions. Previous studies have shed light on the pitfalls of label aggregation and have introduced a handful of practical approaches to tackle this issue. Recently proposed multi-annotator models, which predict labels individually per annotator, are vulnerable to under-determination for annotators with few samples. This problem is exacerbated in crowdsourced datasets. In this work, we propose Annotator Aware Representations for Texts (AART) for subjective classification tasks. Our approach involves learning representations of annotators, allowing for exploration of annotation behaviors. We show the improvement of our method on metrics that assess the performance on capturing individual annotators{'} perspectives. Additionally, we demonstrate fairness metrics to evaluate our model{'}s equability of performance for marginalized annotators compared to others.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mokhberian-etal-2024-capturing">
<titleInfo>
<title>Capturing Perspectives of Crowdsourced Annotators in Subjective Learning Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Negar</namePart>
<namePart type="family">Mokhberian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Myrl</namePart>
<namePart type="family">Marmarelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Hopp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fred</namePart>
<namePart type="family">Morstatter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Lerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Supervised classification heavily depends on datasets annotated by humans. However, in subjective tasks such as toxicity classification, these annotations often exhibit low agreement among raters. Annotations have commonly been aggregated by employing methods like majority voting to determine a single ground truth label. In subjective tasks, aggregating labels will result in biased labeling and, consequently, biased models that can overlook minority opinions. Previous studies have shed light on the pitfalls of label aggregation and have introduced a handful of practical approaches to tackle this issue. Recently proposed multi-annotator models, which predict labels individually per annotator, are vulnerable to under-determination for annotators with few samples. This problem is exacerbated in crowdsourced datasets. In this work, we propose Annotator Aware Representations for Texts (AART) for subjective classification tasks. Our approach involves learning representations of annotators, allowing for exploration of annotation behaviors. We show the improvement of our method on metrics that assess the performance on capturing individual annotators’ perspectives. Additionally, we demonstrate fairness metrics to evaluate our model’s equability of performance for marginalized annotators compared to others.</abstract>
<identifier type="citekey">mokhberian-etal-2024-capturing</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.407</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.407</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>7337</start>
<end>7349</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Capturing Perspectives of Crowdsourced Annotators in Subjective Learning Tasks
%A Mokhberian, Negar
%A Marmarelis, Myrl
%A Hopp, Frederic
%A Basile, Valerio
%A Morstatter, Fred
%A Lerman, Kristina
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F mokhberian-etal-2024-capturing
%X Supervised classification heavily depends on datasets annotated by humans. However, in subjective tasks such as toxicity classification, these annotations often exhibit low agreement among raters. Annotations have commonly been aggregated by employing methods like majority voting to determine a single ground truth label. In subjective tasks, aggregating labels will result in biased labeling and, consequently, biased models that can overlook minority opinions. Previous studies have shed light on the pitfalls of label aggregation and have introduced a handful of practical approaches to tackle this issue. Recently proposed multi-annotator models, which predict labels individually per annotator, are vulnerable to under-determination for annotators with few samples. This problem is exacerbated in crowdsourced datasets. In this work, we propose Annotator Aware Representations for Texts (AART) for subjective classification tasks. Our approach involves learning representations of annotators, allowing for exploration of annotation behaviors. We show the improvement of our method on metrics that assess the performance on capturing individual annotators’ perspectives. Additionally, we demonstrate fairness metrics to evaluate our model’s equability of performance for marginalized annotators compared to others.
%R 10.18653/v1/2024.naacl-long.407
%U https://aclanthology.org/2024.naacl-long.407
%U https://doi.org/10.18653/v1/2024.naacl-long.407
%P 7337-7349
Markdown (Informal)
[Capturing Perspectives of Crowdsourced Annotators in Subjective Learning Tasks](https://aclanthology.org/2024.naacl-long.407) (Mokhberian et al., NAACL 2024)
ACL
- Negar Mokhberian, Myrl Marmarelis, Frederic Hopp, Valerio Basile, Fred Morstatter, and Kristina Lerman. 2024. Capturing Perspectives of Crowdsourced Annotators in Subjective Learning Tasks. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 7337–7349, Mexico City, Mexico. Association for Computational Linguistics.