@inproceedings{lavinia-etal-2023-bidirectional,
title = "Bidirectional Neural Machine Translation ({NMT}) using Monolingual Data for {K}hasi-{E}nglish Pair",
author = "Lavinia, Nongbri and
Gourashyam, Moirangthem and
Samarendra, Salam and
Kishorjit, Nongmeikapam",
editor = "Jyoti, D. Pawar and
Sobha, Lalitha Devi",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.24",
pages = "318--325",
abstract = "Due to a lack of parallel data, low-resource language machine translation has been unable to make the most of Neural Machine Translation. This paper investigates several approaches as to how low-resource Neural Machine Translation can be improved in a strictly low-resource setting, especially for bidirectional Khasi-English language pairs. The back-translation method is used to expand the parallel corpus using monolingual data. The work also experimented with subword tokenizers to improve the translation accuracy for new and rare words. Transformer, a cutting-edge NMT model, serves as the backbone of the bidirectional Khasi-English machine translation. The final Khasi-to-English and English-to-Khasi NMT models trained using both authentic and synthetic parallel corpora show an increase of 2.34 and 3.1 BLEU scores, respectively, when compared to the models trained using only authentic parallel dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lavinia-etal-2023-bidirectional">
<titleInfo>
<title>Bidirectional Neural Machine Translation (NMT) using Monolingual Data for Khasi-English Pair</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nongbri</namePart>
<namePart type="family">Lavinia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moirangthem</namePart>
<namePart type="family">Gourashyam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Samarendra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nongmeikapam</namePart>
<namePart type="family">Kishorjit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">D</namePart>
<namePart type="given">Pawar</namePart>
<namePart type="family">Jyoti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lalitha</namePart>
<namePart type="given">Devi</namePart>
<namePart type="family">Sobha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Due to a lack of parallel data, low-resource language machine translation has been unable to make the most of Neural Machine Translation. This paper investigates several approaches as to how low-resource Neural Machine Translation can be improved in a strictly low-resource setting, especially for bidirectional Khasi-English language pairs. The back-translation method is used to expand the parallel corpus using monolingual data. The work also experimented with subword tokenizers to improve the translation accuracy for new and rare words. Transformer, a cutting-edge NMT model, serves as the backbone of the bidirectional Khasi-English machine translation. The final Khasi-to-English and English-to-Khasi NMT models trained using both authentic and synthetic parallel corpora show an increase of 2.34 and 3.1 BLEU scores, respectively, when compared to the models trained using only authentic parallel dataset.</abstract>
<identifier type="citekey">lavinia-etal-2023-bidirectional</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.24</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>318</start>
<end>325</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bidirectional Neural Machine Translation (NMT) using Monolingual Data for Khasi-English Pair
%A Lavinia, Nongbri
%A Gourashyam, Moirangthem
%A Samarendra, Salam
%A Kishorjit, Nongmeikapam
%Y Jyoti, D. Pawar
%Y Sobha, Lalitha Devi
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F lavinia-etal-2023-bidirectional
%X Due to a lack of parallel data, low-resource language machine translation has been unable to make the most of Neural Machine Translation. This paper investigates several approaches as to how low-resource Neural Machine Translation can be improved in a strictly low-resource setting, especially for bidirectional Khasi-English language pairs. The back-translation method is used to expand the parallel corpus using monolingual data. The work also experimented with subword tokenizers to improve the translation accuracy for new and rare words. Transformer, a cutting-edge NMT model, serves as the backbone of the bidirectional Khasi-English machine translation. The final Khasi-to-English and English-to-Khasi NMT models trained using both authentic and synthetic parallel corpora show an increase of 2.34 and 3.1 BLEU scores, respectively, when compared to the models trained using only authentic parallel dataset.
%U https://aclanthology.org/2023.icon-1.24
%P 318-325
Markdown (Informal)
[Bidirectional Neural Machine Translation (NMT) using Monolingual Data for Khasi-English Pair](https://aclanthology.org/2023.icon-1.24) (Lavinia et al., ICON 2023)
ACL