@inproceedings{ahmed-etal-2023-enhancing,
title = "Enhancing {S}panish-{Q}uechua Machine Translation with Pre-Trained Models and Diverse Data Sources: {LCT}-{EHU} at {A}mericas{NLP} Shared Task",
author = "Ahmed, Nouman and
Flechas Manrique, Natalia and
Petrovi{\'c}, Antonije",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Oncevay, Arturo and
Rice, Enora and
Rijhwani, Shruti and
Palmer, Alexis and
Kann, Katharina",
booktitle = "Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.americasnlp-1.16",
doi = "10.18653/v1/2023.americasnlp-1.16",
pages = "156--162",
abstract = "We present the LCT-EHU submission to the AmericasNLP 2023 low-resource machine translation shared task. We focus on the Spanish-Quechua language pair and explore the usage of different approaches: (1) Obtain new parallel corpora from the literature and legal domains, (2) Compare a high-resource Spanish-English pre-trained MT model with a Spanish-Finnish pre-trained model (with Finnish being chosen as a target language due to its morphological similarity to Quechua), and (3) Explore additional techniques such as copied corpus and back-translation. Overall, we show that the Spanish-Finnish pre-trained model outperforms other setups, while low-quality synthetic data reduces the performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahmed-etal-2023-enhancing">
<titleInfo>
<title>Enhancing Spanish-Quechua Machine Translation with Pre-Trained Models and Diverse Data Sources: LCT-EHU at AmericasNLP Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nouman</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="family">Flechas Manrique</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonije</namePart>
<namePart type="family">Petrović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enora</namePart>
<namePart type="family">Rice</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the LCT-EHU submission to the AmericasNLP 2023 low-resource machine translation shared task. We focus on the Spanish-Quechua language pair and explore the usage of different approaches: (1) Obtain new parallel corpora from the literature and legal domains, (2) Compare a high-resource Spanish-English pre-trained MT model with a Spanish-Finnish pre-trained model (with Finnish being chosen as a target language due to its morphological similarity to Quechua), and (3) Explore additional techniques such as copied corpus and back-translation. Overall, we show that the Spanish-Finnish pre-trained model outperforms other setups, while low-quality synthetic data reduces the performance.</abstract>
<identifier type="citekey">ahmed-etal-2023-enhancing</identifier>
<identifier type="doi">10.18653/v1/2023.americasnlp-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.americasnlp-1.16</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>156</start>
<end>162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Spanish-Quechua Machine Translation with Pre-Trained Models and Diverse Data Sources: LCT-EHU at AmericasNLP Shared Task
%A Ahmed, Nouman
%A Flechas Manrique, Natalia
%A Petrović, Antonije
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Oncevay, Arturo
%Y Rice, Enora
%Y Rijhwani, Shruti
%Y Palmer, Alexis
%Y Kann, Katharina
%S Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F ahmed-etal-2023-enhancing
%X We present the LCT-EHU submission to the AmericasNLP 2023 low-resource machine translation shared task. We focus on the Spanish-Quechua language pair and explore the usage of different approaches: (1) Obtain new parallel corpora from the literature and legal domains, (2) Compare a high-resource Spanish-English pre-trained MT model with a Spanish-Finnish pre-trained model (with Finnish being chosen as a target language due to its morphological similarity to Quechua), and (3) Explore additional techniques such as copied corpus and back-translation. Overall, we show that the Spanish-Finnish pre-trained model outperforms other setups, while low-quality synthetic data reduces the performance.
%R 10.18653/v1/2023.americasnlp-1.16
%U https://aclanthology.org/2023.americasnlp-1.16
%U https://doi.org/10.18653/v1/2023.americasnlp-1.16
%P 156-162
Markdown (Informal)
[Enhancing Spanish-Quechua Machine Translation with Pre-Trained Models and Diverse Data Sources: LCT-EHU at AmericasNLP Shared Task](https://aclanthology.org/2023.americasnlp-1.16) (Ahmed et al., AmericasNLP 2023)
ACL