@inproceedings{b-etal-2021-overview,
title = "An Overview of Fairness in Data {--} Illuminating the Bias in Data Pipeline",
author = "B, Senthil Kumar and
Chandrabose, Aravindan and
Chakravarthi, Bharathi Raja",
editor = "Chakravarthi, Bharathi Raja and
McCrae, John P. and
Zarrouk, Manel and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion",
month = apr,
year = "2021",
address = "Kyiv",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.ltedi-1.5",
pages = "34--45",
abstract = "Data in general encodes human biases by default; being aware of this is a good start, and the research around how to handle it is ongoing. The term {`}bias{'} is extensively used in various contexts in NLP systems. In our research the focus is specific to biases such as gender, racism, religion, demographic and other intersectional views on biases that prevail in text processing systems responsible for systematically discriminating specific population, which is not ethical in NLP. These biases exacerbate the lack of equality, diversity and inclusion of specific population while utilizing the NLP applications. The tools and technology at the intermediate level utilize biased data, and transfer or amplify this bias to the downstream applications. However, it is not enough to be colourblind, gender-neutral alone when designing a unbiased technology {--} instead, we should take a conscious effort by designing a unified framework to measure and benchmark the bias. In this paper, we recommend six measures and one augment measure based on the observations of the bias in data, annotations, text representations and debiasing techniques.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="b-etal-2021-overview">
<titleInfo>
<title>An Overview of Fairness in Data – Illuminating the Bias in Data Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Senthil</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aravindan</namePart>
<namePart type="family">Chandrabose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manel</namePart>
<namePart type="family">Zarrouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data in general encodes human biases by default; being aware of this is a good start, and the research around how to handle it is ongoing. The term ‘bias’ is extensively used in various contexts in NLP systems. In our research the focus is specific to biases such as gender, racism, religion, demographic and other intersectional views on biases that prevail in text processing systems responsible for systematically discriminating specific population, which is not ethical in NLP. These biases exacerbate the lack of equality, diversity and inclusion of specific population while utilizing the NLP applications. The tools and technology at the intermediate level utilize biased data, and transfer or amplify this bias to the downstream applications. However, it is not enough to be colourblind, gender-neutral alone when designing a unbiased technology – instead, we should take a conscious effort by designing a unified framework to measure and benchmark the bias. In this paper, we recommend six measures and one augment measure based on the observations of the bias in data, annotations, text representations and debiasing techniques.</abstract>
<identifier type="citekey">b-etal-2021-overview</identifier>
<location>
<url>https://aclanthology.org/2021.ltedi-1.5</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>34</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Overview of Fairness in Data – Illuminating the Bias in Data Pipeline
%A B, Senthil Kumar
%A Chandrabose, Aravindan
%A Chakravarthi, Bharathi Raja
%Y Chakravarthi, Bharathi Raja
%Y McCrae, John P.
%Y Zarrouk, Manel
%Y Bali, Kalika
%Y Buitelaar, Paul
%S Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv
%F b-etal-2021-overview
%X Data in general encodes human biases by default; being aware of this is a good start, and the research around how to handle it is ongoing. The term ‘bias’ is extensively used in various contexts in NLP systems. In our research the focus is specific to biases such as gender, racism, religion, demographic and other intersectional views on biases that prevail in text processing systems responsible for systematically discriminating specific population, which is not ethical in NLP. These biases exacerbate the lack of equality, diversity and inclusion of specific population while utilizing the NLP applications. The tools and technology at the intermediate level utilize biased data, and transfer or amplify this bias to the downstream applications. However, it is not enough to be colourblind, gender-neutral alone when designing a unbiased technology – instead, we should take a conscious effort by designing a unified framework to measure and benchmark the bias. In this paper, we recommend six measures and one augment measure based on the observations of the bias in data, annotations, text representations and debiasing techniques.
%U https://aclanthology.org/2021.ltedi-1.5
%P 34-45
Markdown (Informal)
[An Overview of Fairness in Data – Illuminating the Bias in Data Pipeline](https://aclanthology.org/2021.ltedi-1.5) (B et al., LTEDI 2021)
ACL