@inproceedings{roque-etal-2024-samsung,
title = "{S}amsung {R}{\&}{D} Institute {P}hilippines @ {WMT} 2024 {I}ndic {MT} Task",
author = "Roque, Matthew Theodore and
Catalan, Carlos Rafael and
Velasco, Dan John and
Rufino, Manuel Antonio and
Cruz, Jan Christian Blaise",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.62",
doi = "10.18653/v1/2024.wmt-1.62",
pages = "735--741",
abstract = "This paper presents the methodology developed by the Samsung R{\&}D Institute Philippines (SRPH) Language Intelligence Team (LIT) for the WMT 2024 Shared Task on Low-Resource Indic Language Translation. We trained standard sequence-to-sequence Transformer models from scratch for both English-to-Indic and Indic-to-English translation directions. Additionally, we explored data augmentation through backtranslation and the application of noisy channel reranking to improve translation quality. A multilingual model trained across all language pairs was also investigated. Our results demonstrate the effectiveness of the multilingual model, with significant performance improvements observed in most language pairs, highlighting the potential of shared language representations in low-resource translation scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roque-etal-2024-samsung">
<titleInfo>
<title>Samsung R&D Institute Philippines @ WMT 2024 Indic MT Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="given">Theodore</namePart>
<namePart type="family">Roque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="given">Rafael</namePart>
<namePart type="family">Catalan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="given">John</namePart>
<namePart type="family">Velasco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="given">Antonio</namePart>
<namePart type="family">Rufino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Christian</namePart>
<namePart type="given">Blaise</namePart>
<namePart type="family">Cruz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the methodology developed by the Samsung R&D Institute Philippines (SRPH) Language Intelligence Team (LIT) for the WMT 2024 Shared Task on Low-Resource Indic Language Translation. We trained standard sequence-to-sequence Transformer models from scratch for both English-to-Indic and Indic-to-English translation directions. Additionally, we explored data augmentation through backtranslation and the application of noisy channel reranking to improve translation quality. A multilingual model trained across all language pairs was also investigated. Our results demonstrate the effectiveness of the multilingual model, with significant performance improvements observed in most language pairs, highlighting the potential of shared language representations in low-resource translation scenarios.</abstract>
<identifier type="citekey">roque-etal-2024-samsung</identifier>
<identifier type="doi">10.18653/v1/2024.wmt-1.62</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.62</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>735</start>
<end>741</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Samsung R&D Institute Philippines @ WMT 2024 Indic MT Task
%A Roque, Matthew Theodore
%A Catalan, Carlos Rafael
%A Velasco, Dan John
%A Rufino, Manuel Antonio
%A Cruz, Jan Christian Blaise
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F roque-etal-2024-samsung
%X This paper presents the methodology developed by the Samsung R&D Institute Philippines (SRPH) Language Intelligence Team (LIT) for the WMT 2024 Shared Task on Low-Resource Indic Language Translation. We trained standard sequence-to-sequence Transformer models from scratch for both English-to-Indic and Indic-to-English translation directions. Additionally, we explored data augmentation through backtranslation and the application of noisy channel reranking to improve translation quality. A multilingual model trained across all language pairs was also investigated. Our results demonstrate the effectiveness of the multilingual model, with significant performance improvements observed in most language pairs, highlighting the potential of shared language representations in low-resource translation scenarios.
%R 10.18653/v1/2024.wmt-1.62
%U https://aclanthology.org/2024.wmt-1.62
%U https://doi.org/10.18653/v1/2024.wmt-1.62
%P 735-741
Markdown (Informal)
[Samsung R&D Institute Philippines @ WMT 2024 Indic MT Task](https://aclanthology.org/2024.wmt-1.62) (Roque et al., WMT 2024)
ACL
- Matthew Theodore Roque, Carlos Rafael Catalan, Dan John Velasco, Manuel Antonio Rufino, and Jan Christian Blaise Cruz. 2024. Samsung R&D Institute Philippines @ WMT 2024 Indic MT Task. In Proceedings of the Ninth Conference on Machine Translation, pages 735–741, Miami, Florida, USA. Association for Computational Linguistics.