@inproceedings{appicharla-etal-2021-iitp,
title = "{IITP}-{MT} at {CALCS}2021: {E}nglish to {H}inglish Neural Machine Translation using Unsupervised Synthetic Code-Mixed Parallel Corpus",
author = "Appicharla, Ramakrishna and
Gupta, Kamal Kumar and
Ekbal, Asif and
Bhattacharyya, Pushpak",
editor = "Solorio, Thamar and
Chen, Shuguang and
Black, Alan W. and
Diab, Mona and
Sitaram, Sunayana and
Soto, Victor and
Yilmaz, Emre and
Srinivasan, Anirudh",
booktitle = "Proceedings of the Fifth Workshop on Computational Approaches to Linguistic Code-Switching",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.calcs-1.5",
doi = "10.18653/v1/2021.calcs-1.5",
pages = "31--35",
abstract = "This paper describes the system submitted by IITP-MT team to Computational Approaches to Linguistic Code-Switching (CALCS 2021) shared task on MT for English→Hinglish. We submit a neural machine translation (NMT) system which is trained on the synthetic code-mixed (cm) English-Hinglish parallel corpus. We propose an approach to create code-mixed parallel corpus from a clean parallel corpus in an unsupervised manner. It is an alignment based approach and we do not use any linguistic resources for explicitly marking any token for code-switching. We also train NMT model on the gold corpus provided by the workshop organizers augmented with the generated synthetic code-mixed parallel corpus. The model trained over the generated synthetic cm data achieves 10.09 BLEU points over the given test set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="appicharla-etal-2021-iitp">
<titleInfo>
<title>IITP-MT at CALCS2021: English to Hinglish Neural Machine Translation using Unsupervised Synthetic Code-Mixed Parallel Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ramakrishna</namePart>
<namePart type="family">Appicharla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kamal</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Computational Approaches to Linguistic Code-Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuguang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Black</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Soto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emre</namePart>
<namePart type="family">Yilmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anirudh</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the system submitted by IITP-MT team to Computational Approaches to Linguistic Code-Switching (CALCS 2021) shared task on MT for English→Hinglish. We submit a neural machine translation (NMT) system which is trained on the synthetic code-mixed (cm) English-Hinglish parallel corpus. We propose an approach to create code-mixed parallel corpus from a clean parallel corpus in an unsupervised manner. It is an alignment based approach and we do not use any linguistic resources for explicitly marking any token for code-switching. We also train NMT model on the gold corpus provided by the workshop organizers augmented with the generated synthetic code-mixed parallel corpus. The model trained over the generated synthetic cm data achieves 10.09 BLEU points over the given test set.</abstract>
<identifier type="citekey">appicharla-etal-2021-iitp</identifier>
<identifier type="doi">10.18653/v1/2021.calcs-1.5</identifier>
<location>
<url>https://aclanthology.org/2021.calcs-1.5</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>31</start>
<end>35</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IITP-MT at CALCS2021: English to Hinglish Neural Machine Translation using Unsupervised Synthetic Code-Mixed Parallel Corpus
%A Appicharla, Ramakrishna
%A Gupta, Kamal Kumar
%A Ekbal, Asif
%A Bhattacharyya, Pushpak
%Y Solorio, Thamar
%Y Chen, Shuguang
%Y Black, Alan W.
%Y Diab, Mona
%Y Sitaram, Sunayana
%Y Soto, Victor
%Y Yilmaz, Emre
%Y Srinivasan, Anirudh
%S Proceedings of the Fifth Workshop on Computational Approaches to Linguistic Code-Switching
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F appicharla-etal-2021-iitp
%X This paper describes the system submitted by IITP-MT team to Computational Approaches to Linguistic Code-Switching (CALCS 2021) shared task on MT for English→Hinglish. We submit a neural machine translation (NMT) system which is trained on the synthetic code-mixed (cm) English-Hinglish parallel corpus. We propose an approach to create code-mixed parallel corpus from a clean parallel corpus in an unsupervised manner. It is an alignment based approach and we do not use any linguistic resources for explicitly marking any token for code-switching. We also train NMT model on the gold corpus provided by the workshop organizers augmented with the generated synthetic code-mixed parallel corpus. The model trained over the generated synthetic cm data achieves 10.09 BLEU points over the given test set.
%R 10.18653/v1/2021.calcs-1.5
%U https://aclanthology.org/2021.calcs-1.5
%U https://doi.org/10.18653/v1/2021.calcs-1.5
%P 31-35
Markdown (Informal)
[IITP-MT at CALCS2021: English to Hinglish Neural Machine Translation using Unsupervised Synthetic Code-Mixed Parallel Corpus](https://aclanthology.org/2021.calcs-1.5) (Appicharla et al., CALCS 2021)
ACL