@inproceedings{perez-ortiz-etal-2024-expanding,
title = "Expanding the {FLORES}+ Multilingual Benchmark with Translations for {A}ragonese, Aranese, {A}sturian, and {V}alencian",
author = "Perez-Ortiz, Juan Antonio and
S{\'a}nchez-Mart{\'\i}nez, Felipe and
S{\'a}nchez-Cartagena, V{\'\i}ctor M. and
Espl{\`a}-Gomis, Miquel and
Galiano Jimenez, Aaron and
Oliver, Antoni and
Avent{\'\i}n-Boya, Claudi and
Pardos, Alejandro and
Vald{\'e}s, Cristina and
Sans Socasau, Jus{\`e}p Lo{\'\i}s and
Mart{\'\i}nez, Juan Pablo",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.41",
pages = "547--555",
abstract = "In this paper, we describe the process of creating the FLORES+ datasets for several Romance languages spoken in Spain, namely Aragonese, Aranese, Asturian, and Valencian. The Aragonese and Aranese datasets are entirely new additions to the FLORES+ multilingual benchmark. An initial version of the Asturian dataset was already available in FLORES+, and our work focused on a thorough revision. Similarly, FLORES+ included a Catalan dataset, which we adapted to the Valencian variety spoken in the Valencian Community. The development of the Aragonese, Aranese, and revised Asturian FLORES+ datasets was undertaken as part of a WMT24 shared task on translation into low-resource languages of Spain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perez-ortiz-etal-2024-expanding">
<titleInfo>
<title>Expanding the FLORES+ Multilingual Benchmark with Translations for Aragonese, Aranese, Asturian, and Valencian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Antonio</namePart>
<namePart type="family">Perez-Ortiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felipe</namePart>
<namePart type="family">Sánchez-Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Víctor</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miquel</namePart>
<namePart type="family">Esplà-Gomis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Galiano Jimenez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoni</namePart>
<namePart type="family">Oliver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudi</namePart>
<namePart type="family">Aventín-Boya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alejandro</namePart>
<namePart type="family">Pardos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">Valdés</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jusèp</namePart>
<namePart type="given">Loís</namePart>
<namePart type="family">Sans Socasau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Pablo</namePart>
<namePart type="family">Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe the process of creating the FLORES+ datasets for several Romance languages spoken in Spain, namely Aragonese, Aranese, Asturian, and Valencian. The Aragonese and Aranese datasets are entirely new additions to the FLORES+ multilingual benchmark. An initial version of the Asturian dataset was already available in FLORES+, and our work focused on a thorough revision. Similarly, FLORES+ included a Catalan dataset, which we adapted to the Valencian variety spoken in the Valencian Community. The development of the Aragonese, Aranese, and revised Asturian FLORES+ datasets was undertaken as part of a WMT24 shared task on translation into low-resource languages of Spain.</abstract>
<identifier type="citekey">perez-ortiz-etal-2024-expanding</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.41</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>547</start>
<end>555</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Expanding the FLORES+ Multilingual Benchmark with Translations for Aragonese, Aranese, Asturian, and Valencian
%A Perez-Ortiz, Juan Antonio
%A Sánchez-Martínez, Felipe
%A Sánchez-Cartagena, Víctor M.
%A Esplà-Gomis, Miquel
%A Galiano Jimenez, Aaron
%A Oliver, Antoni
%A Aventín-Boya, Claudi
%A Pardos, Alejandro
%A Valdés, Cristina
%A Sans Socasau, Jusèp Loís
%A Martínez, Juan Pablo
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F perez-ortiz-etal-2024-expanding
%X In this paper, we describe the process of creating the FLORES+ datasets for several Romance languages spoken in Spain, namely Aragonese, Aranese, Asturian, and Valencian. The Aragonese and Aranese datasets are entirely new additions to the FLORES+ multilingual benchmark. An initial version of the Asturian dataset was already available in FLORES+, and our work focused on a thorough revision. Similarly, FLORES+ included a Catalan dataset, which we adapted to the Valencian variety spoken in the Valencian Community. The development of the Aragonese, Aranese, and revised Asturian FLORES+ datasets was undertaken as part of a WMT24 shared task on translation into low-resource languages of Spain.
%U https://aclanthology.org/2024.wmt-1.41
%P 547-555
Markdown (Informal)
[Expanding the FLORES+ Multilingual Benchmark with Translations for Aragonese, Aranese, Asturian, and Valencian](https://aclanthology.org/2024.wmt-1.41) (Perez-Ortiz et al., WMT 2024)
ACL
- Juan Antonio Perez-Ortiz, Felipe Sánchez-Martínez, Víctor M. Sánchez-Cartagena, Miquel Esplà-Gomis, Aaron Galiano Jimenez, Antoni Oliver, Claudi Aventín-Boya, Alejandro Pardos, Cristina Valdés, Jusèp Loís Sans Socasau, and Juan Pablo Martínez. 2024. Expanding the FLORES+ Multilingual Benchmark with Translations for Aragonese, Aranese, Asturian, and Valencian. In Proceedings of the Ninth Conference on Machine Translation, pages 547–555, Miami, Florida, USA. Association for Computational Linguistics.