@inproceedings{pranav-etal-2025-glitter,
title = "Glitter: A Multi-Sentence, Multi-Reference Benchmark for Gender-Fair {G}erman Machine Translation",
author = "Pranav, A and
Hackenbuchner, Jani{\c{c}}a and
Attanasio, Giuseppe and
Lardelli, Manuel and
Lauscher, Anne",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1002/",
pages = "18450--18477",
ISBN = "979-8-89176-335-7",
abstract = "Machine translation (MT) research addressing gender inclusivity has gained attention for promoting non-exclusionary language representing all genders. However, existing resources are limited in size, most often consisting of single sentences, or single gender-fair formulation types, leaving questions about MT models' ability to use context and diverse inclusive forms. We introduce Glitter, an English-German benchmark featuring extended passages with professional translations implementing three gender-fair alternatives: neutral rewording, typographical solutions (gender star), and neologistic forms (-ens forms). Our experiments reveal significant limitations in state-of-the-art language models, which default to masculine generics, struggle to interpret explicit gender cues in context, and rarely produce gender-fair translations. Through a systematic prompting analysis designed to elicit fair language, we demonstrate that these limitations stem from models' fundamental misunderstanding of gender phenomena, as they fail to implement inclusive forms even when explicitly instructed. Glitter establishes a challenging benchmark, advancing research in gender-fair English-German MT. It highlights substantial room for improvement among leading models and can guide the development of future MT models capable of accurately representing gender diversity."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pranav-etal-2025-glitter">
<titleInfo>
<title>Glitter: A Multi-Sentence, Multi-Reference Benchmark for Gender-Fair German Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="family">Pranav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janiça</namePart>
<namePart type="family">Hackenbuchner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Attanasio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Lardelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Lauscher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Machine translation (MT) research addressing gender inclusivity has gained attention for promoting non-exclusionary language representing all genders. However, existing resources are limited in size, most often consisting of single sentences, or single gender-fair formulation types, leaving questions about MT models’ ability to use context and diverse inclusive forms. We introduce Glitter, an English-German benchmark featuring extended passages with professional translations implementing three gender-fair alternatives: neutral rewording, typographical solutions (gender star), and neologistic forms (-ens forms). Our experiments reveal significant limitations in state-of-the-art language models, which default to masculine generics, struggle to interpret explicit gender cues in context, and rarely produce gender-fair translations. Through a systematic prompting analysis designed to elicit fair language, we demonstrate that these limitations stem from models’ fundamental misunderstanding of gender phenomena, as they fail to implement inclusive forms even when explicitly instructed. Glitter establishes a challenging benchmark, advancing research in gender-fair English-German MT. It highlights substantial room for improvement among leading models and can guide the development of future MT models capable of accurately representing gender diversity.</abstract>
<identifier type="citekey">pranav-etal-2025-glitter</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1002/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>18450</start>
<end>18477</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Glitter: A Multi-Sentence, Multi-Reference Benchmark for Gender-Fair German Machine Translation
%A Pranav, A.
%A Hackenbuchner, Janiça
%A Attanasio, Giuseppe
%A Lardelli, Manuel
%A Lauscher, Anne
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F pranav-etal-2025-glitter
%X Machine translation (MT) research addressing gender inclusivity has gained attention for promoting non-exclusionary language representing all genders. However, existing resources are limited in size, most often consisting of single sentences, or single gender-fair formulation types, leaving questions about MT models’ ability to use context and diverse inclusive forms. We introduce Glitter, an English-German benchmark featuring extended passages with professional translations implementing three gender-fair alternatives: neutral rewording, typographical solutions (gender star), and neologistic forms (-ens forms). Our experiments reveal significant limitations in state-of-the-art language models, which default to masculine generics, struggle to interpret explicit gender cues in context, and rarely produce gender-fair translations. Through a systematic prompting analysis designed to elicit fair language, we demonstrate that these limitations stem from models’ fundamental misunderstanding of gender phenomena, as they fail to implement inclusive forms even when explicitly instructed. Glitter establishes a challenging benchmark, advancing research in gender-fair English-German MT. It highlights substantial room for improvement among leading models and can guide the development of future MT models capable of accurately representing gender diversity.
%U https://aclanthology.org/2025.findings-emnlp.1002/
%P 18450-18477
Markdown (Informal)
[Glitter: A Multi-Sentence, Multi-Reference Benchmark for Gender-Fair German Machine Translation](https://aclanthology.org/2025.findings-emnlp.1002/) (Pranav et al., Findings 2025)
ACL