@inproceedings{mirzakhalov-etal-2021-evaluating,
title = "Evaluating Multiway Multilingual {NMT} in the {T}urkic Languages",
author = "Mirzakhalov, Jamshidbek and
Babu, Anoop and
Kunafin, Aigiz and
Wahab, Ahsan and
Moydinboyev, Bekhzodbek and
Ivanova, Sardana and
Uzokova, Mokhiyakhon and
Pulatova, Shaxnoza and
Ataman, Duygu and
Kreutzer, Julia and
Tyers, Francis and
Firat, Orhan and
Licato, John and
Chellappan, Sriram",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wmt-1.60",
pages = "518--530",
abstract = "Despite the increasing number of large and comprehensive machine translation (MT) systems, evaluation of these methods in various languages has been restrained by the lack of high-quality parallel corpora as well as engagement with the people that speak these languages. In this study, we present an evaluation of state-of-the-art approaches to training and evaluating MT systems in 22 languages from the Turkic language family, most of which being extremely under-explored. First, we adopt the TIL Corpus with a few key improvements to the training and the evaluation sets. Then, we train 26 bilingual baselines as well as a multi-way neural MT (MNMT) model using the corpus and perform an extensive analysis using automatic metrics as well as human evaluations. We find that the MNMT model outperforms almost all bilingual baselines in the out-of-domain test sets and finetuning the model on a downstream task of a single pair also results in a huge performance boost in both low- and high-resource scenarios. Our attentive analysis of evaluation criteria for MT models in Turkic languages also points to the necessity for further research in this direction. We release the corpus splits, test sets as well as models to the public.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mirzakhalov-etal-2021-evaluating">
<titleInfo>
<title>Evaluating Multiway Multilingual NMT in the Turkic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jamshidbek</namePart>
<namePart type="family">Mirzakhalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Babu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aigiz</namePart>
<namePart type="family">Kunafin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahsan</namePart>
<namePart type="family">Wahab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bekhzodbek</namePart>
<namePart type="family">Moydinboyev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sardana</namePart>
<namePart type="family">Ivanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokhiyakhon</namePart>
<namePart type="family">Uzokova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaxnoza</namePart>
<namePart type="family">Pulatova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Duygu</namePart>
<namePart type="family">Ataman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Kreutzer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orhan</namePart>
<namePart type="family">Firat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Licato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sriram</namePart>
<namePart type="family">Chellappan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the increasing number of large and comprehensive machine translation (MT) systems, evaluation of these methods in various languages has been restrained by the lack of high-quality parallel corpora as well as engagement with the people that speak these languages. In this study, we present an evaluation of state-of-the-art approaches to training and evaluating MT systems in 22 languages from the Turkic language family, most of which being extremely under-explored. First, we adopt the TIL Corpus with a few key improvements to the training and the evaluation sets. Then, we train 26 bilingual baselines as well as a multi-way neural MT (MNMT) model using the corpus and perform an extensive analysis using automatic metrics as well as human evaluations. We find that the MNMT model outperforms almost all bilingual baselines in the out-of-domain test sets and finetuning the model on a downstream task of a single pair also results in a huge performance boost in both low- and high-resource scenarios. Our attentive analysis of evaluation criteria for MT models in Turkic languages also points to the necessity for further research in this direction. We release the corpus splits, test sets as well as models to the public.</abstract>
<identifier type="citekey">mirzakhalov-etal-2021-evaluating</identifier>
<location>
<url>https://aclanthology.org/2021.wmt-1.60</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>518</start>
<end>530</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Multiway Multilingual NMT in the Turkic Languages
%A Mirzakhalov, Jamshidbek
%A Babu, Anoop
%A Kunafin, Aigiz
%A Wahab, Ahsan
%A Moydinboyev, Bekhzodbek
%A Ivanova, Sardana
%A Uzokova, Mokhiyakhon
%A Pulatova, Shaxnoza
%A Ataman, Duygu
%A Kreutzer, Julia
%A Tyers, Francis
%A Firat, Orhan
%A Licato, John
%A Chellappan, Sriram
%S Proceedings of the Sixth Conference on Machine Translation
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F mirzakhalov-etal-2021-evaluating
%X Despite the increasing number of large and comprehensive machine translation (MT) systems, evaluation of these methods in various languages has been restrained by the lack of high-quality parallel corpora as well as engagement with the people that speak these languages. In this study, we present an evaluation of state-of-the-art approaches to training and evaluating MT systems in 22 languages from the Turkic language family, most of which being extremely under-explored. First, we adopt the TIL Corpus with a few key improvements to the training and the evaluation sets. Then, we train 26 bilingual baselines as well as a multi-way neural MT (MNMT) model using the corpus and perform an extensive analysis using automatic metrics as well as human evaluations. We find that the MNMT model outperforms almost all bilingual baselines in the out-of-domain test sets and finetuning the model on a downstream task of a single pair also results in a huge performance boost in both low- and high-resource scenarios. Our attentive analysis of evaluation criteria for MT models in Turkic languages also points to the necessity for further research in this direction. We release the corpus splits, test sets as well as models to the public.
%U https://aclanthology.org/2021.wmt-1.60
%P 518-530
Markdown (Informal)
[Evaluating Multiway Multilingual NMT in the Turkic Languages](https://aclanthology.org/2021.wmt-1.60) (Mirzakhalov et al., WMT 2021)
ACL
- Jamshidbek Mirzakhalov, Anoop Babu, Aigiz Kunafin, Ahsan Wahab, Bekhzodbek Moydinboyev, Sardana Ivanova, Mokhiyakhon Uzokova, Shaxnoza Pulatova, Duygu Ataman, Julia Kreutzer, Francis Tyers, Orhan Firat, John Licato, and Sriram Chellappan. 2021. Evaluating Multiway Multilingual NMT in the Turkic Languages. In Proceedings of the Sixth Conference on Machine Translation, pages 518–530, Online. Association for Computational Linguistics.