@inproceedings{soni-bhattacharyya-2024-romantra,
title = "{R}o{M}antra: Optimizing Neural Machine Translation for Low-Resource Languages through {R}omanization",
author = "Soni, Govind and
Bhattacharyya, Pushpak",
editor = "Lalitha Devi, Sobha and
Arora, Karunesh",
booktitle = "Proceedings of the 21st International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2024",
address = "AU-KBC Research Centre, Chennai, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2024.icon-1.18/",
pages = "157--168",
abstract = "Neural Machine Translation (NMT) for low-resource language pairs with distinct scripts, such as Hindi-Chinese and Japanese-Hindi, poses significant challenges due to scriptural and linguistic differences. This paper investigates the efficacy of romanization as a preprocessing step to bridge these gaps. We compare baseline models trained on native scripts with models incorporating romanization in three configurations: both-side, source-side only, and target-side only. Additionally, we introduce a script restoration model that converts romanized output back to native scripts, ensuring accurate evaluation. Our experiments show that romanization, particularly when applied to both sides, improves translation quality across the studied language pairs. The script restoration model further enhances the practicality of this approach by enabling evaluation in native scripts with some performance loss. This work provides insights into leveraging romanization for NMT in low-resource, cross-script settings, presenting a promising direction for under-researched language combinations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="soni-bhattacharyya-2024-romantra">
<titleInfo>
<title>RoMantra: Optimizing Neural Machine Translation for Low-Resource Languages through Romanization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Govind</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karunesh</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">AU-KBC Research Centre, Chennai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) for low-resource language pairs with distinct scripts, such as Hindi-Chinese and Japanese-Hindi, poses significant challenges due to scriptural and linguistic differences. This paper investigates the efficacy of romanization as a preprocessing step to bridge these gaps. We compare baseline models trained on native scripts with models incorporating romanization in three configurations: both-side, source-side only, and target-side only. Additionally, we introduce a script restoration model that converts romanized output back to native scripts, ensuring accurate evaluation. Our experiments show that romanization, particularly when applied to both sides, improves translation quality across the studied language pairs. The script restoration model further enhances the practicality of this approach by enabling evaluation in native scripts with some performance loss. This work provides insights into leveraging romanization for NMT in low-resource, cross-script settings, presenting a promising direction for under-researched language combinations.</abstract>
<identifier type="citekey">soni-bhattacharyya-2024-romantra</identifier>
<location>
<url>https://aclanthology.org/2024.icon-1.18/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>157</start>
<end>168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RoMantra: Optimizing Neural Machine Translation for Low-Resource Languages through Romanization
%A Soni, Govind
%A Bhattacharyya, Pushpak
%Y Lalitha Devi, Sobha
%Y Arora, Karunesh
%S Proceedings of the 21st International Conference on Natural Language Processing (ICON)
%D 2024
%8 December
%I NLP Association of India (NLPAI)
%C AU-KBC Research Centre, Chennai, India
%F soni-bhattacharyya-2024-romantra
%X Neural Machine Translation (NMT) for low-resource language pairs with distinct scripts, such as Hindi-Chinese and Japanese-Hindi, poses significant challenges due to scriptural and linguistic differences. This paper investigates the efficacy of romanization as a preprocessing step to bridge these gaps. We compare baseline models trained on native scripts with models incorporating romanization in three configurations: both-side, source-side only, and target-side only. Additionally, we introduce a script restoration model that converts romanized output back to native scripts, ensuring accurate evaluation. Our experiments show that romanization, particularly when applied to both sides, improves translation quality across the studied language pairs. The script restoration model further enhances the practicality of this approach by enabling evaluation in native scripts with some performance loss. This work provides insights into leveraging romanization for NMT in low-resource, cross-script settings, presenting a promising direction for under-researched language combinations.
%U https://aclanthology.org/2024.icon-1.18/
%P 157-168
Markdown (Informal)
[RoMantra: Optimizing Neural Machine Translation for Low-Resource Languages through Romanization](https://aclanthology.org/2024.icon-1.18/) (Soni & Bhattacharyya, ICON 2024)
ACL