@inproceedings{hamada-etal-2025-akibanlp,
title = "{A}kiba{NLP}-{TUT}: Injecting Language-Specific Word-Level Noise for Low-Resource Language Translation",
author = "Hamada, Shoki and
Akiba, Tomoyosi and
Tsukada, Hajime",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wmt-1.104/",
pages = "1259--1264",
ISBN = "979-8-89176-341-8",
abstract = "In this paper, we describes our system for the WMT 2025 Low-Resource Indic Language Translation Shared Task.The language directions addressed are Assamese{\ensuremath{\leftrightarrow}}English and Manipuri{\textrightarrow}English.We propose a method to improve translation performance from low-resource languages (LRLs) to English by injecting Language-specific word-level noise into the parallel corpus of a closely related high-resource language (HRL).In the proposed method, word replacements are performed based on edit distance, using vocabulary and frequency information extracted from an LRL monolingual corpus.Experiments conducted on Assamese and Manipuri show that, in the absence of LRL parallel data, the proposed method outperforms both the w/o noise setting and existing approaches. Furthermore, we confirmed that increasing the size of the monolingual corpus used for noise injection leads to improved translation performance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hamada-etal-2025-akibanlp">
<titleInfo>
<title>AkibaNLP-TUT: Injecting Language-Specific Word-Level Noise for Low-Resource Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shoki</namePart>
<namePart type="family">Hamada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoyosi</namePart>
<namePart type="family">Akiba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hajime</namePart>
<namePart type="family">Tsukada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-341-8</identifier>
</relatedItem>
<abstract>In this paper, we describes our system for the WMT 2025 Low-Resource Indic Language Translation Shared Task.The language directions addressed are Assamese\ensuremathłeftrightarrowEnglish and Manipuri→English.We propose a method to improve translation performance from low-resource languages (LRLs) to English by injecting Language-specific word-level noise into the parallel corpus of a closely related high-resource language (HRL).In the proposed method, word replacements are performed based on edit distance, using vocabulary and frequency information extracted from an LRL monolingual corpus.Experiments conducted on Assamese and Manipuri show that, in the absence of LRL parallel data, the proposed method outperforms both the w/o noise setting and existing approaches. Furthermore, we confirmed that increasing the size of the monolingual corpus used for noise injection leads to improved translation performance.</abstract>
<identifier type="citekey">hamada-etal-2025-akibanlp</identifier>
<location>
<url>https://aclanthology.org/2025.wmt-1.104/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>1259</start>
<end>1264</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AkibaNLP-TUT: Injecting Language-Specific Word-Level Noise for Low-Resource Language Translation
%A Hamada, Shoki
%A Akiba, Tomoyosi
%A Tsukada, Hajime
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Tenth Conference on Machine Translation
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-341-8
%F hamada-etal-2025-akibanlp
%X In this paper, we describes our system for the WMT 2025 Low-Resource Indic Language Translation Shared Task.The language directions addressed are Assamese\ensuremathłeftrightarrowEnglish and Manipuri→English.We propose a method to improve translation performance from low-resource languages (LRLs) to English by injecting Language-specific word-level noise into the parallel corpus of a closely related high-resource language (HRL).In the proposed method, word replacements are performed based on edit distance, using vocabulary and frequency information extracted from an LRL monolingual corpus.Experiments conducted on Assamese and Manipuri show that, in the absence of LRL parallel data, the proposed method outperforms both the w/o noise setting and existing approaches. Furthermore, we confirmed that increasing the size of the monolingual corpus used for noise injection leads to improved translation performance.
%U https://aclanthology.org/2025.wmt-1.104/
%P 1259-1264
Markdown (Informal)
[AkibaNLP-TUT: Injecting Language-Specific Word-Level Noise for Low-Resource Language Translation](https://aclanthology.org/2025.wmt-1.104/) (Hamada et al., WMT 2025)
ACL