@inproceedings{das-etal-2022-nit,
title = "{NIT} Rourkela Machine Translation({MT}) System Submission to {WAT} 2022 for {M}ulti{I}ndic{MT}: An Indic Language Multilingual Shared Task",
author = "Das, Sudhansu Bala and
Biradar, Atharv and
Mishra, Tapas Kumar and
Patra, Bidyut Kumar",
booktitle = "Proceedings of the 9th Workshop on Asian Translation",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2022.wat-1.8",
pages = "73--77",
abstract = "Multilingual Neural Machine Translation (MNMT) exhibits incredible performance with the development of a single translation model for many languages. Previous studies on multilingual translation reveal that multilingual training is effective for languages with limited corpus. This paper presents our submission (Team Id: NITR) in the WAT 2022 for {``}MultiIndicMT shared task{''} where the objective of the task is the translation between 5 Indic languages from OPUS Corpus (which are newly added in WAT 2022 corpus) into English and vice versa using the corpus provided by the organizer of WAT. Our system is based on a transformer-based NMT using fairseq modelling toolkit with ensemble techniques. Heuristic pre-processing approaches are carried out before keeping the model under training. Our multilingual NMT systems are trained with shared encoder and decoder parameters followed by assigning language embeddings to each token in both encoder and decoder. Our final multilingual system was examined by using BLEU and RIBES metrics scores. In future, we look forward to extend our research that will help in fine-tuning of both encoder and decoder during the monolingual unsupervised training in order to improve the quality of the synthetic data generated during the process.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-etal-2022-nit">
<titleInfo>
<title>NIT Rourkela Machine Translation(MT) System Submission to WAT 2022 for MultiIndicMT: An Indic Language Multilingual Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sudhansu</namePart>
<namePart type="given">Bala</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atharv</namePart>
<namePart type="family">Biradar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tapas</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bidyut</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Patra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Asian Translation</title>
</titleInfo>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multilingual Neural Machine Translation (MNMT) exhibits incredible performance with the development of a single translation model for many languages. Previous studies on multilingual translation reveal that multilingual training is effective for languages with limited corpus. This paper presents our submission (Team Id: NITR) in the WAT 2022 for “MultiIndicMT shared task” where the objective of the task is the translation between 5 Indic languages from OPUS Corpus (which are newly added in WAT 2022 corpus) into English and vice versa using the corpus provided by the organizer of WAT. Our system is based on a transformer-based NMT using fairseq modelling toolkit with ensemble techniques. Heuristic pre-processing approaches are carried out before keeping the model under training. Our multilingual NMT systems are trained with shared encoder and decoder parameters followed by assigning language embeddings to each token in both encoder and decoder. Our final multilingual system was examined by using BLEU and RIBES metrics scores. In future, we look forward to extend our research that will help in fine-tuning of both encoder and decoder during the monolingual unsupervised training in order to improve the quality of the synthetic data generated during the process.</abstract>
<identifier type="citekey">das-etal-2022-nit</identifier>
<location>
<url>https://aclanthology.org/2022.wat-1.8</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>73</start>
<end>77</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NIT Rourkela Machine Translation(MT) System Submission to WAT 2022 for MultiIndicMT: An Indic Language Multilingual Shared Task
%A Das, Sudhansu Bala
%A Biradar, Atharv
%A Mishra, Tapas Kumar
%A Patra, Bidyut Kumar
%S Proceedings of the 9th Workshop on Asian Translation
%D 2022
%8 October
%I International Conference on Computational Linguistics
%C Gyeongju, Republic of Korea
%F das-etal-2022-nit
%X Multilingual Neural Machine Translation (MNMT) exhibits incredible performance with the development of a single translation model for many languages. Previous studies on multilingual translation reveal that multilingual training is effective for languages with limited corpus. This paper presents our submission (Team Id: NITR) in the WAT 2022 for “MultiIndicMT shared task” where the objective of the task is the translation between 5 Indic languages from OPUS Corpus (which are newly added in WAT 2022 corpus) into English and vice versa using the corpus provided by the organizer of WAT. Our system is based on a transformer-based NMT using fairseq modelling toolkit with ensemble techniques. Heuristic pre-processing approaches are carried out before keeping the model under training. Our multilingual NMT systems are trained with shared encoder and decoder parameters followed by assigning language embeddings to each token in both encoder and decoder. Our final multilingual system was examined by using BLEU and RIBES metrics scores. In future, we look forward to extend our research that will help in fine-tuning of both encoder and decoder during the monolingual unsupervised training in order to improve the quality of the synthetic data generated during the process.
%U https://aclanthology.org/2022.wat-1.8
%P 73-77
Markdown (Informal)
[NIT Rourkela Machine Translation(MT) System Submission to WAT 2022 for MultiIndicMT: An Indic Language Multilingual Shared Task](https://aclanthology.org/2022.wat-1.8) (Das et al., WAT 2022)
ACL