@inproceedings{ahmed-buys-2024-neural,
title = "Neural Machine Translation between Low-Resource Languages with Synthetic Pivoting",
author = "Ahmed, Khalid and
Buys, Jan",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1063",
pages = "12144--12158",
abstract = "Training neural models for translating between low-resource languages is challenging due to the scarcity of direct parallel data between such languages. Pivot-based neural machine translation (NMT) systems overcome data scarcity by including a high-resource pivot language in the process of translating between low-resource languages. We propose synthetic pivoting, a novel approach to pivot-based translation in which the pivot sentences are generated synthetically from both the source and target languages. Synthetic pivot sentences are generated through sequence-level knowledge distillation, with the aim of changing the structure of pivot sentences to be closer to that of the source or target languages, thereby reducing pivot translation complexity. We incorporate synthetic pivoting into two paradigms for pivoting: cascading and direct translation using synthetic source and target sentences. We find that the performance of pivot-based systems highly depends on the quality of the NMT model used for sentence regeneration. Furthermore, training back-translation models on these sentences can make the models more robust to input-side noise. The results show that synthetic data generation improves pivot-based systems translating between low-resource Southern African languages by up to 5.6 BLEU points after fine-tuning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahmed-buys-2024-neural">
<titleInfo>
<title>Neural Machine Translation between Low-Resource Languages with Synthetic Pivoting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Buys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Training neural models for translating between low-resource languages is challenging due to the scarcity of direct parallel data between such languages. Pivot-based neural machine translation (NMT) systems overcome data scarcity by including a high-resource pivot language in the process of translating between low-resource languages. We propose synthetic pivoting, a novel approach to pivot-based translation in which the pivot sentences are generated synthetically from both the source and target languages. Synthetic pivot sentences are generated through sequence-level knowledge distillation, with the aim of changing the structure of pivot sentences to be closer to that of the source or target languages, thereby reducing pivot translation complexity. We incorporate synthetic pivoting into two paradigms for pivoting: cascading and direct translation using synthetic source and target sentences. We find that the performance of pivot-based systems highly depends on the quality of the NMT model used for sentence regeneration. Furthermore, training back-translation models on these sentences can make the models more robust to input-side noise. The results show that synthetic data generation improves pivot-based systems translating between low-resource Southern African languages by up to 5.6 BLEU points after fine-tuning.</abstract>
<identifier type="citekey">ahmed-buys-2024-neural</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1063</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>12144</start>
<end>12158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Machine Translation between Low-Resource Languages with Synthetic Pivoting
%A Ahmed, Khalid
%A Buys, Jan
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ahmed-buys-2024-neural
%X Training neural models for translating between low-resource languages is challenging due to the scarcity of direct parallel data between such languages. Pivot-based neural machine translation (NMT) systems overcome data scarcity by including a high-resource pivot language in the process of translating between low-resource languages. We propose synthetic pivoting, a novel approach to pivot-based translation in which the pivot sentences are generated synthetically from both the source and target languages. Synthetic pivot sentences are generated through sequence-level knowledge distillation, with the aim of changing the structure of pivot sentences to be closer to that of the source or target languages, thereby reducing pivot translation complexity. We incorporate synthetic pivoting into two paradigms for pivoting: cascading and direct translation using synthetic source and target sentences. We find that the performance of pivot-based systems highly depends on the quality of the NMT model used for sentence regeneration. Furthermore, training back-translation models on these sentences can make the models more robust to input-side noise. The results show that synthetic data generation improves pivot-based systems translating between low-resource Southern African languages by up to 5.6 BLEU points after fine-tuning.
%U https://aclanthology.org/2024.lrec-main.1063
%P 12144-12158
Markdown (Informal)
[Neural Machine Translation between Low-Resource Languages with Synthetic Pivoting](https://aclanthology.org/2024.lrec-main.1063) (Ahmed & Buys, LREC-COLING 2024)
ACL