@inproceedings{pengpun-etal-2024-creating,
title = "On Creating an {E}nglish-{T}hai Code-switched Machine Translation in Medical Domain",
author = "Pengpun, Parinthapat and
Tiankanon, Krittamate and
Chinkamol, Amrest and
Kinchagawat, Jiramet and
Chairuengjitjaras, Pitchaya and
Supholkhan, Pasit and
Aussavavirojekul, Pubordee and
Boonnag, Chiraphat and
Veerakanjana, Kanyakorn and
Phimsiri, Hirunkul and
Sae-jia, Boonthicha and
Sataudom, Nattawach and
Ittichaiwong, Piyalitt and
Limkonchotiwat, Peerat",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.351",
pages = "6055--6073",
abstract = "Machine translation (MT) in the medical domain plays a pivotal role in enhancing healthcare quality and disseminating medical knowledge. Despite advancements in English-Thai MT technology, common MT approaches often underperform in the medical field due to their inability to precisely translate medical terminologies. Our research prioritizes not merely improving translation accuracy but also maintaining medical terminology in English within the translated text through code-switched (CS) translation. We developed a method to produce CS medical translation data, fine-tuned a CS translation model with this data, and evaluated its performance against strong baselines, such as Google Neural Machine Translation (NMT) and GPT-3.5/GPT-4. Our model demonstrated competitive performance in automatic metrics and was highly favored in human preference evaluations. Our evaluation result also shows that medical professionals significantly prefer CS translations that maintain critical English terms accurately, even if it slightly compromises fluency. Our code and test set are publicly available https://github.com/preceptorai-org/NLLB{\_}CS{\_}EM{\_}NLP2024.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pengpun-etal-2024-creating">
<titleInfo>
<title>On Creating an English-Thai Code-switched Machine Translation in Medical Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parinthapat</namePart>
<namePart type="family">Pengpun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krittamate</namePart>
<namePart type="family">Tiankanon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amrest</namePart>
<namePart type="family">Chinkamol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiramet</namePart>
<namePart type="family">Kinchagawat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pitchaya</namePart>
<namePart type="family">Chairuengjitjaras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pasit</namePart>
<namePart type="family">Supholkhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pubordee</namePart>
<namePart type="family">Aussavavirojekul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chiraphat</namePart>
<namePart type="family">Boonnag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kanyakorn</namePart>
<namePart type="family">Veerakanjana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hirunkul</namePart>
<namePart type="family">Phimsiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boonthicha</namePart>
<namePart type="family">Sae-jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nattawach</namePart>
<namePart type="family">Sataudom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piyalitt</namePart>
<namePart type="family">Ittichaiwong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation (MT) in the medical domain plays a pivotal role in enhancing healthcare quality and disseminating medical knowledge. Despite advancements in English-Thai MT technology, common MT approaches often underperform in the medical field due to their inability to precisely translate medical terminologies. Our research prioritizes not merely improving translation accuracy but also maintaining medical terminology in English within the translated text through code-switched (CS) translation. We developed a method to produce CS medical translation data, fine-tuned a CS translation model with this data, and evaluated its performance against strong baselines, such as Google Neural Machine Translation (NMT) and GPT-3.5/GPT-4. Our model demonstrated competitive performance in automatic metrics and was highly favored in human preference evaluations. Our evaluation result also shows that medical professionals significantly prefer CS translations that maintain critical English terms accurately, even if it slightly compromises fluency. Our code and test set are publicly available https://github.com/preceptorai-org/NLLB_CS_EM_NLP2024.</abstract>
<identifier type="citekey">pengpun-etal-2024-creating</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.351</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6055</start>
<end>6073</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Creating an English-Thai Code-switched Machine Translation in Medical Domain
%A Pengpun, Parinthapat
%A Tiankanon, Krittamate
%A Chinkamol, Amrest
%A Kinchagawat, Jiramet
%A Chairuengjitjaras, Pitchaya
%A Supholkhan, Pasit
%A Aussavavirojekul, Pubordee
%A Boonnag, Chiraphat
%A Veerakanjana, Kanyakorn
%A Phimsiri, Hirunkul
%A Sae-jia, Boonthicha
%A Sataudom, Nattawach
%A Ittichaiwong, Piyalitt
%A Limkonchotiwat, Peerat
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F pengpun-etal-2024-creating
%X Machine translation (MT) in the medical domain plays a pivotal role in enhancing healthcare quality and disseminating medical knowledge. Despite advancements in English-Thai MT technology, common MT approaches often underperform in the medical field due to their inability to precisely translate medical terminologies. Our research prioritizes not merely improving translation accuracy but also maintaining medical terminology in English within the translated text through code-switched (CS) translation. We developed a method to produce CS medical translation data, fine-tuned a CS translation model with this data, and evaluated its performance against strong baselines, such as Google Neural Machine Translation (NMT) and GPT-3.5/GPT-4. Our model demonstrated competitive performance in automatic metrics and was highly favored in human preference evaluations. Our evaluation result also shows that medical professionals significantly prefer CS translations that maintain critical English terms accurately, even if it slightly compromises fluency. Our code and test set are publicly available https://github.com/preceptorai-org/NLLB_CS_EM_NLP2024.
%U https://aclanthology.org/2024.findings-emnlp.351
%P 6055-6073
Markdown (Informal)
[On Creating an English-Thai Code-switched Machine Translation in Medical Domain](https://aclanthology.org/2024.findings-emnlp.351) (Pengpun et al., Findings 2024)
ACL
- Parinthapat Pengpun, Krittamate Tiankanon, Amrest Chinkamol, Jiramet Kinchagawat, Pitchaya Chairuengjitjaras, Pasit Supholkhan, Pubordee Aussavavirojekul, Chiraphat Boonnag, Kanyakorn Veerakanjana, Hirunkul Phimsiri, Boonthicha Sae-jia, Nattawach Sataudom, Piyalitt Ittichaiwong, and Peerat Limkonchotiwat. 2024. On Creating an English-Thai Code-switched Machine Translation in Medical Domain. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 6055–6073, Miami, Florida, USA. Association for Computational Linguistics.