@inproceedings{kiritchenko-mohammad-2018-examining,
title = "Examining Gender and Race Bias in Two Hundred Sentiment Analysis Systems",
author = "Kiritchenko, Svetlana and
Mohammad, Saif",
editor = "Nissim, Malvina and
Berant, Jonathan and
Lenci, Alessandro",
booktitle = "Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S18-2005",
doi = "10.18653/v1/S18-2005",
pages = "43--53",
abstract = "Automatic machine learning systems can inadvertently accentuate and perpetuate inappropriate human biases. Past work on examining inappropriate biases has largely focused on just individual systems. Further, there is no benchmark dataset for examining inappropriate biases in systems. Here for the first time, we present the Equity Evaluation Corpus (EEC), which consists of 8,640 English sentences carefully chosen to tease out biases towards certain races and genders. We use the dataset to examine 219 automatic sentiment analysis systems that took part in a recent shared task, SemEval-2018 Task 1 {`}Affect in Tweets{'}. We find that several of the systems show statistically significant bias; that is, they consistently provide slightly higher sentiment intensity predictions for one race or one gender. We make the EEC freely available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kiritchenko-mohammad-2018-examining">
<titleInfo>
<title>Examining Gender and Race Bias in Two Hundred Sentiment Analysis Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svetlana</namePart>
<namePart type="family">Kiritchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Berant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic machine learning systems can inadvertently accentuate and perpetuate inappropriate human biases. Past work on examining inappropriate biases has largely focused on just individual systems. Further, there is no benchmark dataset for examining inappropriate biases in systems. Here for the first time, we present the Equity Evaluation Corpus (EEC), which consists of 8,640 English sentences carefully chosen to tease out biases towards certain races and genders. We use the dataset to examine 219 automatic sentiment analysis systems that took part in a recent shared task, SemEval-2018 Task 1 ‘Affect in Tweets’. We find that several of the systems show statistically significant bias; that is, they consistently provide slightly higher sentiment intensity predictions for one race or one gender. We make the EEC freely available.</abstract>
<identifier type="citekey">kiritchenko-mohammad-2018-examining</identifier>
<identifier type="doi">10.18653/v1/S18-2005</identifier>
<location>
<url>https://aclanthology.org/S18-2005</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>43</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Examining Gender and Race Bias in Two Hundred Sentiment Analysis Systems
%A Kiritchenko, Svetlana
%A Mohammad, Saif
%Y Nissim, Malvina
%Y Berant, Jonathan
%Y Lenci, Alessandro
%S Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F kiritchenko-mohammad-2018-examining
%X Automatic machine learning systems can inadvertently accentuate and perpetuate inappropriate human biases. Past work on examining inappropriate biases has largely focused on just individual systems. Further, there is no benchmark dataset for examining inappropriate biases in systems. Here for the first time, we present the Equity Evaluation Corpus (EEC), which consists of 8,640 English sentences carefully chosen to tease out biases towards certain races and genders. We use the dataset to examine 219 automatic sentiment analysis systems that took part in a recent shared task, SemEval-2018 Task 1 ‘Affect in Tweets’. We find that several of the systems show statistically significant bias; that is, they consistently provide slightly higher sentiment intensity predictions for one race or one gender. We make the EEC freely available.
%R 10.18653/v1/S18-2005
%U https://aclanthology.org/S18-2005
%U https://doi.org/10.18653/v1/S18-2005
%P 43-53
Markdown (Informal)
[Examining Gender and Race Bias in Two Hundred Sentiment Analysis Systems](https://aclanthology.org/S18-2005) (Kiritchenko & Mohammad, *SEM 2018)
ACL