@inproceedings{nguyen-etal-2023-effective,
title = "How effective is machine translation on low-resource code-switching? A case study comparing human and automatic metrics",
author = "Nguyen, Li and
Bryant, Christopher and
Mayeux, Oliver and
Yuan, Zheng",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.893",
doi = "10.18653/v1/2023.findings-acl.893",
pages = "14186--14195",
abstract = "This paper presents an investigation into the differences between processing monolingual input and code-switching (CSW) input in the context of machine translation (MT). Specifically, we compare the performance of three MT systems (Google, mBART-50 and M2M-100-big) in terms of their ability to translate monolingual Vietnamese, a low-resource language, and Vietnamese-English CSW respectively. To our knowledge, this is the first study to systematically analyse what might happen when multilingual MT systems are exposed to CSW data using both automatic and human metrics. We find that state-of-the-art neural translation systems not only achieve higher scores on automatic metrics when processing CSW input (compared to monolingual input), but also produce translations that are consistently rated as more semantically faithful by humans. We further suggest that automatic evaluation alone is insufficient for evaluating the translation of CSW input. Our findings establish a new benchmark that offers insights into the relationship between MT and CSW.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2023-effective">
<titleInfo>
<title>How effective is machine translation on low-resource code-switching? A case study comparing human and automatic metrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Bryant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Mayeux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents an investigation into the differences between processing monolingual input and code-switching (CSW) input in the context of machine translation (MT). Specifically, we compare the performance of three MT systems (Google, mBART-50 and M2M-100-big) in terms of their ability to translate monolingual Vietnamese, a low-resource language, and Vietnamese-English CSW respectively. To our knowledge, this is the first study to systematically analyse what might happen when multilingual MT systems are exposed to CSW data using both automatic and human metrics. We find that state-of-the-art neural translation systems not only achieve higher scores on automatic metrics when processing CSW input (compared to monolingual input), but also produce translations that are consistently rated as more semantically faithful by humans. We further suggest that automatic evaluation alone is insufficient for evaluating the translation of CSW input. Our findings establish a new benchmark that offers insights into the relationship between MT and CSW.</abstract>
<identifier type="citekey">nguyen-etal-2023-effective</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.893</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.893</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>14186</start>
<end>14195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How effective is machine translation on low-resource code-switching? A case study comparing human and automatic metrics
%A Nguyen, Li
%A Bryant, Christopher
%A Mayeux, Oliver
%A Yuan, Zheng
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F nguyen-etal-2023-effective
%X This paper presents an investigation into the differences between processing monolingual input and code-switching (CSW) input in the context of machine translation (MT). Specifically, we compare the performance of three MT systems (Google, mBART-50 and M2M-100-big) in terms of their ability to translate monolingual Vietnamese, a low-resource language, and Vietnamese-English CSW respectively. To our knowledge, this is the first study to systematically analyse what might happen when multilingual MT systems are exposed to CSW data using both automatic and human metrics. We find that state-of-the-art neural translation systems not only achieve higher scores on automatic metrics when processing CSW input (compared to monolingual input), but also produce translations that are consistently rated as more semantically faithful by humans. We further suggest that automatic evaluation alone is insufficient for evaluating the translation of CSW input. Our findings establish a new benchmark that offers insights into the relationship between MT and CSW.
%R 10.18653/v1/2023.findings-acl.893
%U https://aclanthology.org/2023.findings-acl.893
%U https://doi.org/10.18653/v1/2023.findings-acl.893
%P 14186-14195
Markdown (Informal)
[How effective is machine translation on low-resource code-switching? A case study comparing human and automatic metrics](https://aclanthology.org/2023.findings-acl.893) (Nguyen et al., Findings 2023)
ACL