@inproceedings{hong-etal-2024-cantonmt,
title = "{CantonMT}: {C}antonese to {E}nglish {NMT} Platform with Fine-Tuned Models using Real and Synthetic Back-Translation Data",
author = "Hong, Kung and
Han, Lifeng and
Batista-Navarro, Riza and
Nenadic, Goran",
editor = "Scarton, Carolina and
Prescott, Charlotte and
Bayliss, Chris and
Oakley, Chris and
Wright, Joanna and
Wrigley, Stuart and
Song, Xingyi and
Gow-Smith, Edward and
Bawden, Rachel and
S{\'a}nchez-Cartagena, V{\'\i}ctor M and
Cadwell, Patrick and
Lapshinova-Koltunski, Ekaterina and
Cabarr{\~a}o, Vera and
Chatzitheodorou, Konstantinos and
Nurminen, Mary and
Kanojia, Diptesh and
Moniz, Helena",
booktitle = "Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)",
month = jun,
year = "2024",
address = "Sheffield, UK",
publisher = "European Association for Machine Translation (EAMT)",
url = "https://aclanthology.org/2024.eamt-1.49",
pages = "590--599",
abstract = "Neural Machine Translation (NMT) for low-resource languages remains a challenge for many NLP researchers. In this work, we deploy a standard data augmentation methodology by back-translation to a new language translation direction, i.e., Cantonese-to-English. We present the models we fine-tuned using the limited amount of real data and the synthetic data we generated using back-translation by three models: OpusMT, NLLB, and mBART.We carried out automatic evaluation using a range of different metrics including those that are lexical-based and embedding-based.Furthermore, we create a user-friendly interface for the models we included in this project, CantonMT, and make it available to facilitate Cantonese-to-English MT research. Researchers can add more models to this platform via our open-source CantonMT toolkit, available at \url{https://github.com/kenrickkung/CantoneseTranslation}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hong-etal-2024-cantonmt">
<titleInfo>
<title>CantonMT: Cantonese to English NMT Platform with Fine-Tuned Models using Real and Synthetic Back-Translation Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kung</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Riza</namePart>
<namePart type="family">Batista-Navarro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Nenadic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charlotte</namePart>
<namePart type="family">Prescott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Bayliss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Oakley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joanna</namePart>
<namePart type="family">Wright</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="family">Wrigley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingyi</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Gow-Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Bawden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Víctor</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Cadwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Cabarrão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantinos</namePart>
<namePart type="family">Chatzitheodorou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diptesh</namePart>
<namePart type="family">Kanojia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation (EAMT)</publisher>
<place>
<placeTerm type="text">Sheffield, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) for low-resource languages remains a challenge for many NLP researchers. In this work, we deploy a standard data augmentation methodology by back-translation to a new language translation direction, i.e., Cantonese-to-English. We present the models we fine-tuned using the limited amount of real data and the synthetic data we generated using back-translation by three models: OpusMT, NLLB, and mBART.We carried out automatic evaluation using a range of different metrics including those that are lexical-based and embedding-based.Furthermore, we create a user-friendly interface for the models we included in this project, CantonMT, and make it available to facilitate Cantonese-to-English MT research. Researchers can add more models to this platform via our open-source CantonMT toolkit, available at https://github.com/kenrickkung/CantoneseTranslation.</abstract>
<identifier type="citekey">hong-etal-2024-cantonmt</identifier>
<location>
<url>https://aclanthology.org/2024.eamt-1.49</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>590</start>
<end>599</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CantonMT: Cantonese to English NMT Platform with Fine-Tuned Models using Real and Synthetic Back-Translation Data
%A Hong, Kung
%A Han, Lifeng
%A Batista-Navarro, Riza
%A Nenadic, Goran
%Y Scarton, Carolina
%Y Prescott, Charlotte
%Y Bayliss, Chris
%Y Oakley, Chris
%Y Wright, Joanna
%Y Wrigley, Stuart
%Y Song, Xingyi
%Y Gow-Smith, Edward
%Y Bawden, Rachel
%Y Sánchez-Cartagena, Víctor M.
%Y Cadwell, Patrick
%Y Lapshinova-Koltunski, Ekaterina
%Y Cabarrão, Vera
%Y Chatzitheodorou, Konstantinos
%Y Nurminen, Mary
%Y Kanojia, Diptesh
%Y Moniz, Helena
%S Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)
%D 2024
%8 June
%I European Association for Machine Translation (EAMT)
%C Sheffield, UK
%F hong-etal-2024-cantonmt
%X Neural Machine Translation (NMT) for low-resource languages remains a challenge for many NLP researchers. In this work, we deploy a standard data augmentation methodology by back-translation to a new language translation direction, i.e., Cantonese-to-English. We present the models we fine-tuned using the limited amount of real data and the synthetic data we generated using back-translation by three models: OpusMT, NLLB, and mBART.We carried out automatic evaluation using a range of different metrics including those that are lexical-based and embedding-based.Furthermore, we create a user-friendly interface for the models we included in this project, CantonMT, and make it available to facilitate Cantonese-to-English MT research. Researchers can add more models to this platform via our open-source CantonMT toolkit, available at https://github.com/kenrickkung/CantoneseTranslation.
%U https://aclanthology.org/2024.eamt-1.49
%P 590-599
Markdown (Informal)
[CantonMT: Cantonese to English NMT Platform with Fine-Tuned Models using Real and Synthetic Back-Translation Data](https://aclanthology.org/2024.eamt-1.49) (Hong et al., EAMT 2024)
ACL