@inproceedings{vavre-etal-2022-adapting,
title = "Adapting Multilingual Models for Code-Mixed Translation",
author = "Vavre, Aditya and
Gupta, Abhirut and
Sarawagi, Sunita",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.528",
doi = "10.18653/v1/2022.findings-emnlp.528",
pages = "7133--7141",
abstract = "The scarcity of gold standard code-mixed to pure language parallel data makes it difficult to train translation models reliably.Prior work has addressed the paucity of parallel data with data augmentation techniques.Such methods rely heavily on external resources making systems difficult to train and scale effectively for multiple languages.We present a simple yet highly effective two-stage back-translation based training scheme for adapting multilingual models to the task of code-mixed translation which eliminates dependence on external resources.We show a substantial improvement in translation quality (measured through BLEU), beating existing prior work by up to +3.8 BLEU on code-mixed Hi$\rightarrow$En, Mr$\rightarrow$En, and Bn$\rightarrow$En tasks. On the LinCE Machine Translation leader board, we achieve the highest score for code-mixed Es$\rightarrow$En, beating existing best baseline by +6.5 BLEU, and our own stronger baseline by +1.1 BLEU.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vavre-etal-2022-adapting">
<titleInfo>
<title>Adapting Multilingual Models for Code-Mixed Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Vavre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhirut</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunita</namePart>
<namePart type="family">Sarawagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The scarcity of gold standard code-mixed to pure language parallel data makes it difficult to train translation models reliably.Prior work has addressed the paucity of parallel data with data augmentation techniques.Such methods rely heavily on external resources making systems difficult to train and scale effectively for multiple languages.We present a simple yet highly effective two-stage back-translation based training scheme for adapting multilingual models to the task of code-mixed translation which eliminates dependence on external resources.We show a substantial improvement in translation quality (measured through BLEU), beating existing prior work by up to +3.8 BLEU on code-mixed Hi\rightarrowEn, Mr\rightarrowEn, and Bn\rightarrowEn tasks. On the LinCE Machine Translation leader board, we achieve the highest score for code-mixed Es\rightarrowEn, beating existing best baseline by +6.5 BLEU, and our own stronger baseline by +1.1 BLEU.</abstract>
<identifier type="citekey">vavre-etal-2022-adapting</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.528</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.528</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>7133</start>
<end>7141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adapting Multilingual Models for Code-Mixed Translation
%A Vavre, Aditya
%A Gupta, Abhirut
%A Sarawagi, Sunita
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F vavre-etal-2022-adapting
%X The scarcity of gold standard code-mixed to pure language parallel data makes it difficult to train translation models reliably.Prior work has addressed the paucity of parallel data with data augmentation techniques.Such methods rely heavily on external resources making systems difficult to train and scale effectively for multiple languages.We present a simple yet highly effective two-stage back-translation based training scheme for adapting multilingual models to the task of code-mixed translation which eliminates dependence on external resources.We show a substantial improvement in translation quality (measured through BLEU), beating existing prior work by up to +3.8 BLEU on code-mixed Hi\rightarrowEn, Mr\rightarrowEn, and Bn\rightarrowEn tasks. On the LinCE Machine Translation leader board, we achieve the highest score for code-mixed Es\rightarrowEn, beating existing best baseline by +6.5 BLEU, and our own stronger baseline by +1.1 BLEU.
%R 10.18653/v1/2022.findings-emnlp.528
%U https://aclanthology.org/2022.findings-emnlp.528
%U https://doi.org/10.18653/v1/2022.findings-emnlp.528
%P 7133-7141
Markdown (Informal)
[Adapting Multilingual Models for Code-Mixed Translation](https://aclanthology.org/2022.findings-emnlp.528) (Vavre et al., Findings 2022)
ACL
- Aditya Vavre, Abhirut Gupta, and Sunita Sarawagi. 2022. Adapting Multilingual Models for Code-Mixed Translation. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 7133–7141, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.