@inproceedings{banerjee-etal-2021-neural,
title = "Neural Machine Translation in Low-Resource Setting: a Case Study in {E}nglish-{M}arathi Pair",
author = "Banerjee, Aakash and
Jain, Aditya and
Mhaskar, Shivam and
Dattatray Deoghare, Sourabh and
Sehgal, Aman and
Bhattacharya, Pushpak",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of Machine Translation Summit XVIII: Research Track",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-research.4",
pages = "35--47",
abstract = "In this paper and we explore different techniques of overcoming the challenges of low-resource in Neural Machine Translation (NMT) and specifically focusing on the case of English-Marathi NMT. NMT systems require a large amount of parallel corpora to obtain good quality translations. We try to mitigate the low-resource problem by augmenting parallel corpora or by using transfer learning. Techniques such as Phrase Table Injection (PTI) and back-translation and mixing of language corpora are used for enhancing the parallel data; whereas pivoting and multilingual embeddings are used to leverage transfer learning. For pivoting and Hindi comes in as assisting language for English-Marathi translation. Compared to baseline transformer model and a significant improvement trend in BLEU score is observed across various techniques. We have done extensive manual and automatic and qualitative evaluation of our systems. Since the trend in Machine Translation (MT) today is post-editing and measuring of Human Effort Reduction (HER) and we have given our preliminary observations on Translation Edit Rate (TER) vs. BLEU score study and where TER is regarded as a measure of HER.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="banerjee-etal-2021-neural">
<titleInfo>
<title>Neural Machine Translation in Low-Resource Setting: a Case Study in English-Marathi Pair</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aakash</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivam</namePart>
<namePart type="family">Mhaskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sourabh</namePart>
<namePart type="family">Dattatray Deoghare</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aman</namePart>
<namePart type="family">Sehgal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XVIII: Research Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Guzmán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper and we explore different techniques of overcoming the challenges of low-resource in Neural Machine Translation (NMT) and specifically focusing on the case of English-Marathi NMT. NMT systems require a large amount of parallel corpora to obtain good quality translations. We try to mitigate the low-resource problem by augmenting parallel corpora or by using transfer learning. Techniques such as Phrase Table Injection (PTI) and back-translation and mixing of language corpora are used for enhancing the parallel data; whereas pivoting and multilingual embeddings are used to leverage transfer learning. For pivoting and Hindi comes in as assisting language for English-Marathi translation. Compared to baseline transformer model and a significant improvement trend in BLEU score is observed across various techniques. We have done extensive manual and automatic and qualitative evaluation of our systems. Since the trend in Machine Translation (MT) today is post-editing and measuring of Human Effort Reduction (HER) and we have given our preliminary observations on Translation Edit Rate (TER) vs. BLEU score study and where TER is regarded as a measure of HER.</abstract>
<identifier type="citekey">banerjee-etal-2021-neural</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-research.4</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>35</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Machine Translation in Low-Resource Setting: a Case Study in English-Marathi Pair
%A Banerjee, Aakash
%A Jain, Aditya
%A Mhaskar, Shivam
%A Dattatray Deoghare, Sourabh
%A Sehgal, Aman
%A Bhattacharya, Pushpak
%Y Duh, Kevin
%Y Guzmán, Francisco
%S Proceedings of Machine Translation Summit XVIII: Research Track
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F banerjee-etal-2021-neural
%X In this paper and we explore different techniques of overcoming the challenges of low-resource in Neural Machine Translation (NMT) and specifically focusing on the case of English-Marathi NMT. NMT systems require a large amount of parallel corpora to obtain good quality translations. We try to mitigate the low-resource problem by augmenting parallel corpora or by using transfer learning. Techniques such as Phrase Table Injection (PTI) and back-translation and mixing of language corpora are used for enhancing the parallel data; whereas pivoting and multilingual embeddings are used to leverage transfer learning. For pivoting and Hindi comes in as assisting language for English-Marathi translation. Compared to baseline transformer model and a significant improvement trend in BLEU score is observed across various techniques. We have done extensive manual and automatic and qualitative evaluation of our systems. Since the trend in Machine Translation (MT) today is post-editing and measuring of Human Effort Reduction (HER) and we have given our preliminary observations on Translation Edit Rate (TER) vs. BLEU score study and where TER is regarded as a measure of HER.
%U https://aclanthology.org/2021.mtsummit-research.4
%P 35-47
Markdown (Informal)
[Neural Machine Translation in Low-Resource Setting: a Case Study in English-Marathi Pair](https://aclanthology.org/2021.mtsummit-research.4) (Banerjee et al., MTSummit 2021)
ACL