@inproceedings{savoldi-etal-2023-test,
title = "Test Suites Task: Evaluation of Gender Fairness in {MT} with {M}u{ST}-{SHE} and {INES}",
author = "Savoldi, Beatrice and
Gaido, Marco and
Negri, Matteo and
Bentivogli, Luisa",
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.25",
doi = "10.18653/v1/2023.wmt-1.25",
pages = "252--262",
abstract = "As part of the WMT-2023 {``}Test suites{''} shared task, in this paper we summarize the results of two test suites evaluations: MuST-SHEWMT23 and INES. By focusing on the en-de and de-en language pairs, we rely on these newly created test suites to investigate systems{'} ability to translate feminine and masculine gender and produce gender-inclusive translations. Furthermore we discuss metrics associated with our test suites and validate them by means of human evaluations. Our results indicate that systems achieve reasonable and comparable performance in correctly translating both feminine and masculine gender forms for naturalistic gender phenomena. Instead, the generation of inclusive language forms in translation emerges as a challenging task for all the evaluated MT models, indicating room for future improvements and research on the topic. We make MuST-SHEWMT23 and INES freely available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="savoldi-etal-2023-test">
<titleInfo>
<title>Test Suites Task: Evaluation of Gender Fairness in MT with MuST-SHE and INES</title>
</titleInfo>
<name type="personal">
<namePart type="given">Beatrice</namePart>
<namePart type="family">Savoldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Gaido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Bentivogli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As part of the WMT-2023 “Test suites” shared task, in this paper we summarize the results of two test suites evaluations: MuST-SHEWMT23 and INES. By focusing on the en-de and de-en language pairs, we rely on these newly created test suites to investigate systems’ ability to translate feminine and masculine gender and produce gender-inclusive translations. Furthermore we discuss metrics associated with our test suites and validate them by means of human evaluations. Our results indicate that systems achieve reasonable and comparable performance in correctly translating both feminine and masculine gender forms for naturalistic gender phenomena. Instead, the generation of inclusive language forms in translation emerges as a challenging task for all the evaluated MT models, indicating room for future improvements and research on the topic. We make MuST-SHEWMT23 and INES freely available.</abstract>
<identifier type="citekey">savoldi-etal-2023-test</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.25</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.25</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>252</start>
<end>262</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Test Suites Task: Evaluation of Gender Fairness in MT with MuST-SHE and INES
%A Savoldi, Beatrice
%A Gaido, Marco
%A Negri, Matteo
%A Bentivogli, Luisa
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F savoldi-etal-2023-test
%X As part of the WMT-2023 “Test suites” shared task, in this paper we summarize the results of two test suites evaluations: MuST-SHEWMT23 and INES. By focusing on the en-de and de-en language pairs, we rely on these newly created test suites to investigate systems’ ability to translate feminine and masculine gender and produce gender-inclusive translations. Furthermore we discuss metrics associated with our test suites and validate them by means of human evaluations. Our results indicate that systems achieve reasonable and comparable performance in correctly translating both feminine and masculine gender forms for naturalistic gender phenomena. Instead, the generation of inclusive language forms in translation emerges as a challenging task for all the evaluated MT models, indicating room for future improvements and research on the topic. We make MuST-SHEWMT23 and INES freely available.
%R 10.18653/v1/2023.wmt-1.25
%U https://aclanthology.org/2023.wmt-1.25
%U https://doi.org/10.18653/v1/2023.wmt-1.25
%P 252-262
Markdown (Informal)
[Test Suites Task: Evaluation of Gender Fairness in MT with MuST-SHE and INES](https://aclanthology.org/2023.wmt-1.25) (Savoldi et al., WMT 2023)
ACL