@inproceedings{merx-etal-2025-tulun,
title = "Tulun: Transparent and Adaptable Low-resource Machine Translation",
author = "Merx, Raphael and
Suominen, Hanna and
Hong, Lois Yinghui and
Thieberger, Nick and
Cohn, Trevor and
Vylomova, Ekaterina",
editor = "Mishra, Pushkar and
Muresan, Smaranda and
Yu, Tao",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-demo.13/",
doi = "10.18653/v1/2025.acl-demo.13",
pages = "129--139",
ISBN = "979-8-89176-253-4",
abstract = "Machine translation (MT) systems that support low-resource languages often struggle on specialized domains. While researchers have proposed various techniques for domain adaptation, these approaches typically require model fine-tuning, making them impractical for non-technical users and small organizations. To address this gap, we propose Tulun, a versatile solution for terminology-aware translation, combining neural MT with large language model (LLM)-based post-editing guided by existing glossaries and translation memories.Our open-source web-based platform enables users to easily create, edit, and leverage terminology resources, fostering a collaborative human-machine translation process that respects and incorporates domain expertise while increasing MT accuracy.Evaluations show effectiveness in both real-world and benchmark scenarios: on medical and disaster relief translation tasks for Tetun and Bislama, our system achieves improvements of 16.90-22.41 ChrF++ points over baseline MT systems. Across six low-resource languages on the FLORES dataset, Tulun outperforms both standalone MT and LLM approaches, achieving an average improvement of 2.8 ChrF++ points over NLLB-54B. Tulun is publicly accessible at https://bislama-trans.rapha.dev."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="merx-etal-2025-tulun">
<titleInfo>
<title>Tulun: Transparent and Adaptable Low-resource Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raphael</namePart>
<namePart type="family">Merx</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Suominen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lois</namePart>
<namePart type="given">Yinghui</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nick</namePart>
<namePart type="family">Thieberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pushkar</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-253-4</identifier>
</relatedItem>
<abstract>Machine translation (MT) systems that support low-resource languages often struggle on specialized domains. While researchers have proposed various techniques for domain adaptation, these approaches typically require model fine-tuning, making them impractical for non-technical users and small organizations. To address this gap, we propose Tulun, a versatile solution for terminology-aware translation, combining neural MT with large language model (LLM)-based post-editing guided by existing glossaries and translation memories.Our open-source web-based platform enables users to easily create, edit, and leverage terminology resources, fostering a collaborative human-machine translation process that respects and incorporates domain expertise while increasing MT accuracy.Evaluations show effectiveness in both real-world and benchmark scenarios: on medical and disaster relief translation tasks for Tetun and Bislama, our system achieves improvements of 16.90-22.41 ChrF++ points over baseline MT systems. Across six low-resource languages on the FLORES dataset, Tulun outperforms both standalone MT and LLM approaches, achieving an average improvement of 2.8 ChrF++ points over NLLB-54B. Tulun is publicly accessible at https://bislama-trans.rapha.dev.</abstract>
<identifier type="citekey">merx-etal-2025-tulun</identifier>
<identifier type="doi">10.18653/v1/2025.acl-demo.13</identifier>
<location>
<url>https://aclanthology.org/2025.acl-demo.13/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>129</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tulun: Transparent and Adaptable Low-resource Machine Translation
%A Merx, Raphael
%A Suominen, Hanna
%A Hong, Lois Yinghui
%A Thieberger, Nick
%A Cohn, Trevor
%A Vylomova, Ekaterina
%Y Mishra, Pushkar
%Y Muresan, Smaranda
%Y Yu, Tao
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-253-4
%F merx-etal-2025-tulun
%X Machine translation (MT) systems that support low-resource languages often struggle on specialized domains. While researchers have proposed various techniques for domain adaptation, these approaches typically require model fine-tuning, making them impractical for non-technical users and small organizations. To address this gap, we propose Tulun, a versatile solution for terminology-aware translation, combining neural MT with large language model (LLM)-based post-editing guided by existing glossaries and translation memories.Our open-source web-based platform enables users to easily create, edit, and leverage terminology resources, fostering a collaborative human-machine translation process that respects and incorporates domain expertise while increasing MT accuracy.Evaluations show effectiveness in both real-world and benchmark scenarios: on medical and disaster relief translation tasks for Tetun and Bislama, our system achieves improvements of 16.90-22.41 ChrF++ points over baseline MT systems. Across six low-resource languages on the FLORES dataset, Tulun outperforms both standalone MT and LLM approaches, achieving an average improvement of 2.8 ChrF++ points over NLLB-54B. Tulun is publicly accessible at https://bislama-trans.rapha.dev.
%R 10.18653/v1/2025.acl-demo.13
%U https://aclanthology.org/2025.acl-demo.13/
%U https://doi.org/10.18653/v1/2025.acl-demo.13
%P 129-139
Markdown (Informal)
[Tulun: Transparent and Adaptable Low-resource Machine Translation](https://aclanthology.org/2025.acl-demo.13/) (Merx et al., ACL 2025)
ACL
- Raphael Merx, Hanna Suominen, Lois Yinghui Hong, Nick Thieberger, Trevor Cohn, and Ekaterina Vylomova. 2025. Tulun: Transparent and Adaptable Low-resource Machine Translation. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 129–139, Vienna, Austria. Association for Computational Linguistics.