@inproceedings{sewunetie-etal-2024-gender,
title = "Gender Bias Evaluation in Machine Translation for {A}mharic, {T}igrigna, and Afaan Oromoo",
author = "Sewunetie, Walelign and
Tonja, Atnafu and
Belay, Tadesse and
Nigatu, Hellina Hailu and
Gebremeskel, Gashaw and
Mossie, Zewdie and
Seid, Hussien and
Yimam, Seid",
editor = "Savoldi, Beatrice and
Hackenbuchner, Jani{\c{c}}a and
Bentivogli, Luisa and
Daems, Joke and
Vanmassenhove, Eva and
Bastings, Jasmijn",
booktitle = "Proceedings of the 2nd International Workshop on Gender-Inclusive Translation Technologies",
month = jun,
year = "2024",
address = "Sheffield, United Kingdom",
publisher = "European Association for Machine Translation (EAMT)",
url = "https://aclanthology.org/2024.gitt-1.1",
pages = "1--11",
abstract = "While Machine Translation (MT) research has progressed over the years, translation systems still suffer from biases, including gender bias. While an active line of research studies the existence and mitigation strategies of gender bias in machine translation systems, there is limited research exploring this phenomenon for low-resource languages. The limited availability of linguistic and computational resources confounded with the lack of benchmark datasets makes studying bias for low-resourced languages that much more difficult. In this paper, we construct benchmark datasets to evaluate gender bias in machine translation for three low-resource languages: Afaan Oromoo (Orm), Amharic (Amh), and Tigrinya (Tir). Building on prior work, we collected 2400 gender-balanced sentences parallelly translated into the three languages. From human evaluations of the dataset we collected, we found that about 93{\%} of Afaan Oromoo, 80{\%} of Tigrinya, and 72{\%} of Amharic sentences exhibited gender bias. In addition to providing benchmarks for improving gender bias mitigation research in the three languages, we hope the careful documentation of our work will help other low-resourced language researchers extend our approach to their languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sewunetie-etal-2024-gender">
<titleInfo>
<title>Gender Bias Evaluation in Machine Translation for Amharic, Tigrigna, and Afaan Oromoo</title>
</titleInfo>
<name type="personal">
<namePart type="given">Walelign</namePart>
<namePart type="family">Sewunetie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atnafu</namePart>
<namePart type="family">Tonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tadesse</namePart>
<namePart type="family">Belay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hellina</namePart>
<namePart type="given">Hailu</namePart>
<namePart type="family">Nigatu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gashaw</namePart>
<namePart type="family">Gebremeskel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zewdie</namePart>
<namePart type="family">Mossie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hussien</namePart>
<namePart type="family">Seid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seid</namePart>
<namePart type="family">Yimam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd International Workshop on Gender-Inclusive Translation Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Beatrice</namePart>
<namePart type="family">Savoldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janiça</namePart>
<namePart type="family">Hackenbuchner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Bentivogli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joke</namePart>
<namePart type="family">Daems</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Vanmassenhove</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jasmijn</namePart>
<namePart type="family">Bastings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation (EAMT)</publisher>
<place>
<placeTerm type="text">Sheffield, United Kingdom</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While Machine Translation (MT) research has progressed over the years, translation systems still suffer from biases, including gender bias. While an active line of research studies the existence and mitigation strategies of gender bias in machine translation systems, there is limited research exploring this phenomenon for low-resource languages. The limited availability of linguistic and computational resources confounded with the lack of benchmark datasets makes studying bias for low-resourced languages that much more difficult. In this paper, we construct benchmark datasets to evaluate gender bias in machine translation for three low-resource languages: Afaan Oromoo (Orm), Amharic (Amh), and Tigrinya (Tir). Building on prior work, we collected 2400 gender-balanced sentences parallelly translated into the three languages. From human evaluations of the dataset we collected, we found that about 93% of Afaan Oromoo, 80% of Tigrinya, and 72% of Amharic sentences exhibited gender bias. In addition to providing benchmarks for improving gender bias mitigation research in the three languages, we hope the careful documentation of our work will help other low-resourced language researchers extend our approach to their languages.</abstract>
<identifier type="citekey">sewunetie-etal-2024-gender</identifier>
<location>
<url>https://aclanthology.org/2024.gitt-1.1</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gender Bias Evaluation in Machine Translation for Amharic, Tigrigna, and Afaan Oromoo
%A Sewunetie, Walelign
%A Tonja, Atnafu
%A Belay, Tadesse
%A Nigatu, Hellina Hailu
%A Gebremeskel, Gashaw
%A Mossie, Zewdie
%A Seid, Hussien
%A Yimam, Seid
%Y Savoldi, Beatrice
%Y Hackenbuchner, Janiça
%Y Bentivogli, Luisa
%Y Daems, Joke
%Y Vanmassenhove, Eva
%Y Bastings, Jasmijn
%S Proceedings of the 2nd International Workshop on Gender-Inclusive Translation Technologies
%D 2024
%8 June
%I European Association for Machine Translation (EAMT)
%C Sheffield, United Kingdom
%F sewunetie-etal-2024-gender
%X While Machine Translation (MT) research has progressed over the years, translation systems still suffer from biases, including gender bias. While an active line of research studies the existence and mitigation strategies of gender bias in machine translation systems, there is limited research exploring this phenomenon for low-resource languages. The limited availability of linguistic and computational resources confounded with the lack of benchmark datasets makes studying bias for low-resourced languages that much more difficult. In this paper, we construct benchmark datasets to evaluate gender bias in machine translation for three low-resource languages: Afaan Oromoo (Orm), Amharic (Amh), and Tigrinya (Tir). Building on prior work, we collected 2400 gender-balanced sentences parallelly translated into the three languages. From human evaluations of the dataset we collected, we found that about 93% of Afaan Oromoo, 80% of Tigrinya, and 72% of Amharic sentences exhibited gender bias. In addition to providing benchmarks for improving gender bias mitigation research in the three languages, we hope the careful documentation of our work will help other low-resourced language researchers extend our approach to their languages.
%U https://aclanthology.org/2024.gitt-1.1
%P 1-11
Markdown (Informal)
[Gender Bias Evaluation in Machine Translation for Amharic, Tigrigna, and Afaan Oromoo](https://aclanthology.org/2024.gitt-1.1) (Sewunetie et al., GITT-WS 2024)
ACL
- Walelign Sewunetie, Atnafu Tonja, Tadesse Belay, Hellina Hailu Nigatu, Gashaw Gebremeskel, Zewdie Mossie, Hussien Seid, and Seid Yimam. 2024. Gender Bias Evaluation in Machine Translation for Amharic, Tigrigna, and Afaan Oromoo. In Proceedings of the 2nd International Workshop on Gender-Inclusive Translation Technologies, pages 1–11, Sheffield, United Kingdom. European Association for Machine Translation (EAMT).