@inproceedings{popovic-lapshinova-koltunski-2025-gender1person,
title = "{GENDER}1{PERSON}: Test Suite for Estimating Gender Bias of First-person Singular Forms",
author = "Popovi{\'c}, Maja and
Lapshinova-Koltunski, Ekaterina",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wmt-1.56/",
pages = "800--822",
ISBN = "979-8-89176-341-8",
abstract = "The gender1person test suite is designed to measure gender bias in translating singular first-person forms from English into two Slavic languages, Russian and Serbian. The test suite consists of 1,000 Amazon product reviews, uniformly distributed over 10 different product categories. Bias is measured through a gender score ranging from -100 (all reviews are feminine) to 100 (all reviews are masculine). The test suite shows that the majority of the systems participating in the WMT-2025 task for these two target languages prefer the masculine writer{'}s gender. There is no single system which is biased towards the feminine variant. Furthermore, for each language pair, there are seven systems that are considered balanced, having the gender scores between -10 and 10.Finally, the analysis of different products showed that the choice of the writer{'}s gender depends to a large extent on the product. Moreover, it is demonstrated that even the systems with overall balanced scores are actually biased, but in different ways for different product categories."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="popovic-lapshinova-koltunski-2025-gender1person">
<titleInfo>
<title>GENDER1PERSON: Test Suite for Estimating Gender Bias of First-person Singular Forms</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-341-8</identifier>
</relatedItem>
<abstract>The gender1person test suite is designed to measure gender bias in translating singular first-person forms from English into two Slavic languages, Russian and Serbian. The test suite consists of 1,000 Amazon product reviews, uniformly distributed over 10 different product categories. Bias is measured through a gender score ranging from -100 (all reviews are feminine) to 100 (all reviews are masculine). The test suite shows that the majority of the systems participating in the WMT-2025 task for these two target languages prefer the masculine writer’s gender. There is no single system which is biased towards the feminine variant. Furthermore, for each language pair, there are seven systems that are considered balanced, having the gender scores between -10 and 10.Finally, the analysis of different products showed that the choice of the writer’s gender depends to a large extent on the product. Moreover, it is demonstrated that even the systems with overall balanced scores are actually biased, but in different ways for different product categories.</abstract>
<identifier type="citekey">popovic-lapshinova-koltunski-2025-gender1person</identifier>
<location>
<url>https://aclanthology.org/2025.wmt-1.56/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>800</start>
<end>822</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GENDER1PERSON: Test Suite for Estimating Gender Bias of First-person Singular Forms
%A Popović, Maja
%A Lapshinova-Koltunski, Ekaterina
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Tenth Conference on Machine Translation
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-341-8
%F popovic-lapshinova-koltunski-2025-gender1person
%X The gender1person test suite is designed to measure gender bias in translating singular first-person forms from English into two Slavic languages, Russian and Serbian. The test suite consists of 1,000 Amazon product reviews, uniformly distributed over 10 different product categories. Bias is measured through a gender score ranging from -100 (all reviews are feminine) to 100 (all reviews are masculine). The test suite shows that the majority of the systems participating in the WMT-2025 task for these two target languages prefer the masculine writer’s gender. There is no single system which is biased towards the feminine variant. Furthermore, for each language pair, there are seven systems that are considered balanced, having the gender scores between -10 and 10.Finally, the analysis of different products showed that the choice of the writer’s gender depends to a large extent on the product. Moreover, it is demonstrated that even the systems with overall balanced scores are actually biased, but in different ways for different product categories.
%U https://aclanthology.org/2025.wmt-1.56/
%P 800-822
Markdown (Informal)
[GENDER1PERSON: Test Suite for Estimating Gender Bias of First-person Singular Forms](https://aclanthology.org/2025.wmt-1.56/) (Popović & Lapshinova-Koltunski, WMT 2025)
ACL