@inproceedings{hitti-etal-2019-proposed,
title = "Proposed Taxonomy for Gender Bias in Text; A Filtering Methodology for the Gender Generalization Subtype",
author = "Hitti, Yasmeen and
Jang, Eunbee and
Moreno, Ines and
Pelletier, Carolyne",
editor = "Costa-juss{\`a}, Marta R. and
Hardmeier, Christian and
Radford, Will and
Webster, Kellie",
booktitle = "Proceedings of the First Workshop on Gender Bias in Natural Language Processing",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3802/",
doi = "10.18653/v1/W19-3802",
pages = "8--17",
abstract = "The purpose of this paper is to present an empirical study on gender bias in text. Current research in this field is focused on detecting and correcting for gender bias in existing machine learning models rather than approaching the issue at the dataset level. The underlying motivation is to create a dataset which could enable machines to learn to differentiate bias writing from non-bias writing. A taxonomy is proposed for structural and contextual gender biases which can manifest themselves in text. A methodology is proposed to fetch one type of structural gender bias, Gender Generalization. We explore the IMDB movie review dataset and 9 different corpora from Project Gutenberg. By filtering out irrelevant sentences, the remaining pool of candidate sentences are sent for human validation. A total of 6123 judgments are made on 1627 sentences and after a quality check on randomly selected sentences we obtain an accuracy of 75{\%}. Out of the 1627 sentences, 808 sentence were labeled as Gender Generalizations. The inter-rater reliability amongst labelers was of 61.14{\%}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hitti-etal-2019-proposed">
<titleInfo>
<title>Proposed Taxonomy for Gender Bias in Text; A Filtering Methodology for the Gender Generalization Subtype</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yasmeen</namePart>
<namePart type="family">Hitti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunbee</namePart>
<namePart type="family">Jang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyne</namePart>
<namePart type="family">Pelletier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Gender Bias in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Will</namePart>
<namePart type="family">Radford</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kellie</namePart>
<namePart type="family">Webster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The purpose of this paper is to present an empirical study on gender bias in text. Current research in this field is focused on detecting and correcting for gender bias in existing machine learning models rather than approaching the issue at the dataset level. The underlying motivation is to create a dataset which could enable machines to learn to differentiate bias writing from non-bias writing. A taxonomy is proposed for structural and contextual gender biases which can manifest themselves in text. A methodology is proposed to fetch one type of structural gender bias, Gender Generalization. We explore the IMDB movie review dataset and 9 different corpora from Project Gutenberg. By filtering out irrelevant sentences, the remaining pool of candidate sentences are sent for human validation. A total of 6123 judgments are made on 1627 sentences and after a quality check on randomly selected sentences we obtain an accuracy of 75%. Out of the 1627 sentences, 808 sentence were labeled as Gender Generalizations. The inter-rater reliability amongst labelers was of 61.14%.</abstract>
<identifier type="citekey">hitti-etal-2019-proposed</identifier>
<identifier type="doi">10.18653/v1/W19-3802</identifier>
<location>
<url>https://aclanthology.org/W19-3802/</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>8</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Proposed Taxonomy for Gender Bias in Text; A Filtering Methodology for the Gender Generalization Subtype
%A Hitti, Yasmeen
%A Jang, Eunbee
%A Moreno, Ines
%A Pelletier, Carolyne
%Y Costa-jussà, Marta R.
%Y Hardmeier, Christian
%Y Radford, Will
%Y Webster, Kellie
%S Proceedings of the First Workshop on Gender Bias in Natural Language Processing
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F hitti-etal-2019-proposed
%X The purpose of this paper is to present an empirical study on gender bias in text. Current research in this field is focused on detecting and correcting for gender bias in existing machine learning models rather than approaching the issue at the dataset level. The underlying motivation is to create a dataset which could enable machines to learn to differentiate bias writing from non-bias writing. A taxonomy is proposed for structural and contextual gender biases which can manifest themselves in text. A methodology is proposed to fetch one type of structural gender bias, Gender Generalization. We explore the IMDB movie review dataset and 9 different corpora from Project Gutenberg. By filtering out irrelevant sentences, the remaining pool of candidate sentences are sent for human validation. A total of 6123 judgments are made on 1627 sentences and after a quality check on randomly selected sentences we obtain an accuracy of 75%. Out of the 1627 sentences, 808 sentence were labeled as Gender Generalizations. The inter-rater reliability amongst labelers was of 61.14%.
%R 10.18653/v1/W19-3802
%U https://aclanthology.org/W19-3802/
%U https://doi.org/10.18653/v1/W19-3802
%P 8-17
Markdown (Informal)
[Proposed Taxonomy for Gender Bias in Text; A Filtering Methodology for the Gender Generalization Subtype](https://aclanthology.org/W19-3802/) (Hitti et al., GeBNLP 2019)
ACL