@inproceedings{hegde-lakshmaiah-2022-mucs,
title = "{MUCS}@{M}ix{MT}: {I}ndic{T}rans-based Machine Translation for {H}inglish Text",
author = "Hegde, Asha and
Lakshmaiah, Shashirekha",
editor = {Koehn, Philipp and
Barrault, Lo{\"\i}c and
Bojar, Ond{\v{r}}ej and
Bougares, Fethi and
Chatterjee, Rajen and
Costa-juss{\`a}, Marta R. and
Federmann, Christian and
Fishel, Mark and
Fraser, Alexander and
Freitag, Markus and
Graham, Yvette and
Grundkiewicz, Roman and
Guzman, Paco and
Haddow, Barry and
Huck, Matthias and
Jimeno Yepes, Antonio and
Kocmi, Tom and
Martins, Andr{\'e} and
Morishita, Makoto and
Monz, Christof and
Nagata, Masaaki and
Nakazawa, Toshiaki and
Negri, Matteo and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Neves, Mariana and
Popel, Martin and
Turchi, Marco and
Zampieri, Marcos},
booktitle = "Proceedings of the Seventh Conference on Machine Translation (WMT)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wmt-1.113",
pages = "1131--1135",
abstract = "Code-mixing is the phenomena of mixing various linguistic units such as paragraphs, sentences, phrases, words, etc., of one language with that of the other language in any text. This code-mixing is predominantly used by social media users who know more than one language. Processing code-mixed text is challenging because of its characteristics and lack of tools that supports such data. Further, pretrained models can be used for the formal text and not for the informal text such as code-mixed. Developing efficient Machine Translation (MT) systems for code-mixed text is challenging due to lack of code-mixed training data. Further, existing MT systems developed to translate monolingual data are not portable to translate code-mixed text mainly due to its informal nature. To address the MT challenges of code-mixed text, this paper describes the proposed MT models submitted by our team MUCS, to the Code-mixed Machine Translation (MixMT) shared task in the Workshop on Machine Translation (WMT) organized in connection with Empirical models in Natural Language Processing (EMNLP) 2022. This shared has two subtasks: i) subtask 1 - to translate English sentences and their corresponding Hindi translations into Hinglish text and ii) subtask 2 - to translate Hinglish text into English text. The proposed models that translate the code-mixed English text to Hinglish (English-Hindli code-mixed text) and vice-versa, comprises of i) transliterating Hinglish text from Latin to Devanagari script and vice-versa, ii) pseudo translation generation using existing models, and iii) efficient target generation by combining the pseudo translations along with the training data provided by the shared task organizers. The proposed models obtained $5^{th}$ and $3^{rd}$ rank with Recall-Oriented Under-study for Gisting Evaluation (ROUGE) scores of 0.35806 and 0.55453 for subtask 1 and subtask 2 respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hegde-lakshmaiah-2022-mucs">
<titleInfo>
<title>MUCS@MixMT: IndicTrans-based Machine Translation for Hinglish Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Asha</namePart>
<namePart type="family">Hegde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashirekha</namePart>
<namePart type="family">Lakshmaiah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Conference on Machine Translation (WMT)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Loïc</namePart>
<namePart type="family">Barrault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajen</namePart>
<namePart type="family">Chatterjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Federmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Fraser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Grundkiewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paco</namePart>
<namePart type="family">Guzman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Huck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Morishita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masaaki</namePart>
<namePart type="family">Nagata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aurélie</namePart>
<namePart type="family">Névéol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Neves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Popel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code-mixing is the phenomena of mixing various linguistic units such as paragraphs, sentences, phrases, words, etc., of one language with that of the other language in any text. This code-mixing is predominantly used by social media users who know more than one language. Processing code-mixed text is challenging because of its characteristics and lack of tools that supports such data. Further, pretrained models can be used for the formal text and not for the informal text such as code-mixed. Developing efficient Machine Translation (MT) systems for code-mixed text is challenging due to lack of code-mixed training data. Further, existing MT systems developed to translate monolingual data are not portable to translate code-mixed text mainly due to its informal nature. To address the MT challenges of code-mixed text, this paper describes the proposed MT models submitted by our team MUCS, to the Code-mixed Machine Translation (MixMT) shared task in the Workshop on Machine Translation (WMT) organized in connection with Empirical models in Natural Language Processing (EMNLP) 2022. This shared has two subtasks: i) subtask 1 - to translate English sentences and their corresponding Hindi translations into Hinglish text and ii) subtask 2 - to translate Hinglish text into English text. The proposed models that translate the code-mixed English text to Hinglish (English-Hindli code-mixed text) and vice-versa, comprises of i) transliterating Hinglish text from Latin to Devanagari script and vice-versa, ii) pseudo translation generation using existing models, and iii) efficient target generation by combining the pseudo translations along with the training data provided by the shared task organizers. The proposed models obtained 5^th and 3^rd rank with Recall-Oriented Under-study for Gisting Evaluation (ROUGE) scores of 0.35806 and 0.55453 for subtask 1 and subtask 2 respectively.</abstract>
<identifier type="citekey">hegde-lakshmaiah-2022-mucs</identifier>
<location>
<url>https://aclanthology.org/2022.wmt-1.113</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>1131</start>
<end>1135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MUCS@MixMT: IndicTrans-based Machine Translation for Hinglish Text
%A Hegde, Asha
%A Lakshmaiah, Shashirekha
%Y Koehn, Philipp
%Y Barrault, Loïc
%Y Bojar, Ondřej
%Y Bougares, Fethi
%Y Chatterjee, Rajen
%Y Costa-jussà, Marta R.
%Y Federmann, Christian
%Y Fishel, Mark
%Y Fraser, Alexander
%Y Freitag, Markus
%Y Graham, Yvette
%Y Grundkiewicz, Roman
%Y Guzman, Paco
%Y Haddow, Barry
%Y Huck, Matthias
%Y Jimeno Yepes, Antonio
%Y Kocmi, Tom
%Y Martins, André
%Y Morishita, Makoto
%Y Monz, Christof
%Y Nagata, Masaaki
%Y Nakazawa, Toshiaki
%Y Negri, Matteo
%Y Névéol, Aurélie
%Y Neves, Mariana
%Y Popel, Martin
%Y Turchi, Marco
%Y Zampieri, Marcos
%S Proceedings of the Seventh Conference on Machine Translation (WMT)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F hegde-lakshmaiah-2022-mucs
%X Code-mixing is the phenomena of mixing various linguistic units such as paragraphs, sentences, phrases, words, etc., of one language with that of the other language in any text. This code-mixing is predominantly used by social media users who know more than one language. Processing code-mixed text is challenging because of its characteristics and lack of tools that supports such data. Further, pretrained models can be used for the formal text and not for the informal text such as code-mixed. Developing efficient Machine Translation (MT) systems for code-mixed text is challenging due to lack of code-mixed training data. Further, existing MT systems developed to translate monolingual data are not portable to translate code-mixed text mainly due to its informal nature. To address the MT challenges of code-mixed text, this paper describes the proposed MT models submitted by our team MUCS, to the Code-mixed Machine Translation (MixMT) shared task in the Workshop on Machine Translation (WMT) organized in connection with Empirical models in Natural Language Processing (EMNLP) 2022. This shared has two subtasks: i) subtask 1 - to translate English sentences and their corresponding Hindi translations into Hinglish text and ii) subtask 2 - to translate Hinglish text into English text. The proposed models that translate the code-mixed English text to Hinglish (English-Hindli code-mixed text) and vice-versa, comprises of i) transliterating Hinglish text from Latin to Devanagari script and vice-versa, ii) pseudo translation generation using existing models, and iii) efficient target generation by combining the pseudo translations along with the training data provided by the shared task organizers. The proposed models obtained 5^th and 3^rd rank with Recall-Oriented Under-study for Gisting Evaluation (ROUGE) scores of 0.35806 and 0.55453 for subtask 1 and subtask 2 respectively.
%U https://aclanthology.org/2022.wmt-1.113
%P 1131-1135
Markdown (Informal)
[MUCS@MixMT: IndicTrans-based Machine Translation for Hinglish Text](https://aclanthology.org/2022.wmt-1.113) (Hegde & Lakshmaiah, WMT 2022)
ACL