@inproceedings{al-kuwatly-etal-2020-identifying,
title = "Identifying and Measuring Annotator Bias Based on Annotators' Demographic Characteristics",
author = "Al Kuwatly, Hala and
Wich, Maximilian and
Groh, Georg",
editor = "Akiwowo, Seyi and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Waseem, Zeerak",
booktitle = "Proceedings of the Fourth Workshop on Online Abuse and Harms",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.alw-1.21/",
doi = "10.18653/v1/2020.alw-1.21",
pages = "184--190",
abstract = "Machine learning is recently used to detect hate speech and other forms of abusive language in online platforms. However, a notable weakness of machine learning models is their vulnerability to bias, which can impair their performance and fairness. One type is annotator bias caused by the subjective perception of the annotators. In this work, we investigate annotator bias using classification models trained on data from demographically distinct annotator groups. To do so, we sample balanced subsets of data that are labeled by demographically distinct annotators. We then train classifiers on these subsets, analyze their performances on similarly grouped test sets, and compare them statistically. Our findings show that the proposed approach successfully identifies bias and that demographic features, such as first language, age, and education, correlate with significant performance differences."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-kuwatly-etal-2020-identifying">
<titleInfo>
<title>Identifying and Measuring Annotator Bias Based on Annotators’ Demographic Characteristics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hala</namePart>
<namePart type="family">Al Kuwatly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Wich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Groh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Online Abuse and Harms</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seyi</namePart>
<namePart type="family">Akiwowo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bertie</namePart>
<namePart type="family">Vidgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine learning is recently used to detect hate speech and other forms of abusive language in online platforms. However, a notable weakness of machine learning models is their vulnerability to bias, which can impair their performance and fairness. One type is annotator bias caused by the subjective perception of the annotators. In this work, we investigate annotator bias using classification models trained on data from demographically distinct annotator groups. To do so, we sample balanced subsets of data that are labeled by demographically distinct annotators. We then train classifiers on these subsets, analyze their performances on similarly grouped test sets, and compare them statistically. Our findings show that the proposed approach successfully identifies bias and that demographic features, such as first language, age, and education, correlate with significant performance differences.</abstract>
<identifier type="citekey">al-kuwatly-etal-2020-identifying</identifier>
<identifier type="doi">10.18653/v1/2020.alw-1.21</identifier>
<location>
<url>https://aclanthology.org/2020.alw-1.21/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>184</start>
<end>190</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying and Measuring Annotator Bias Based on Annotators’ Demographic Characteristics
%A Al Kuwatly, Hala
%A Wich, Maximilian
%A Groh, Georg
%Y Akiwowo, Seyi
%Y Vidgen, Bertie
%Y Prabhakaran, Vinodkumar
%Y Waseem, Zeerak
%S Proceedings of the Fourth Workshop on Online Abuse and Harms
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F al-kuwatly-etal-2020-identifying
%X Machine learning is recently used to detect hate speech and other forms of abusive language in online platforms. However, a notable weakness of machine learning models is their vulnerability to bias, which can impair their performance and fairness. One type is annotator bias caused by the subjective perception of the annotators. In this work, we investigate annotator bias using classification models trained on data from demographically distinct annotator groups. To do so, we sample balanced subsets of data that are labeled by demographically distinct annotators. We then train classifiers on these subsets, analyze their performances on similarly grouped test sets, and compare them statistically. Our findings show that the proposed approach successfully identifies bias and that demographic features, such as first language, age, and education, correlate with significant performance differences.
%R 10.18653/v1/2020.alw-1.21
%U https://aclanthology.org/2020.alw-1.21/
%U https://doi.org/10.18653/v1/2020.alw-1.21
%P 184-190
Markdown (Informal)
[Identifying and Measuring Annotator Bias Based on Annotators’ Demographic Characteristics](https://aclanthology.org/2020.alw-1.21/) (Al Kuwatly et al., ALW 2020)
ACL