@inproceedings{hauhio-friberg-2024-mitra,
title = "Mitra: Improving Terminologically Constrained Translation Quality with Backtranslations and Flag Diacritics",
author = "Hauhio, Iikka and
Friberg, Th{\'e}o",
editor = "Scarton, Carolina and
Prescott, Charlotte and
Bayliss, Chris and
Oakley, Chris and
Wright, Joanna and
Wrigley, Stuart and
Song, Xingyi and
Gow-Smith, Edward and
Bawden, Rachel and
S{\'a}nchez-Cartagena, V{\'\i}ctor M and
Cadwell, Patrick and
Lapshinova-Koltunski, Ekaterina and
Cabarr{\~a}o, Vera and
Chatzitheodorou, Konstantinos and
Nurminen, Mary and
Kanojia, Diptesh and
Moniz, Helena",
booktitle = "Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)",
month = jun,
year = "2024",
address = "Sheffield, UK",
publisher = "European Association for Machine Translation (EAMT)",
url = "https://aclanthology.org/2024.eamt-1.12",
pages = "100--115",
abstract = "Terminologically constrained machine translation is a hot topic in the field of neural machine translation. One major way to categorize constrained translation methods is to divide them into {``}hard{''} constraints that are forced into the target language sentence using a special decoding algorithm, and {``}soft{''} constraints that are included in the input given to the model.We present a constrained translation pipeline that combines soft and hard constraints while being completely model-agnostic, i.e. our method can be used with any NMT or LLM model. In the {``}soft{''} part, we substitute the source language terms in the input sentence for the backtranslations of their target language equivalents. This causes the source sentence to be more similar to the intended translation, thus making it easier to translate for the model. In the {``}hard{''} part, we use a novel nondeterministic finite state transducer-based (NDFST) constraint recognition algorithm utilizing flag diacritics to force the model to use the desired target language terms.We test our model with both Finnish{--}English and English{--}Finnish real-world vocabularies. We find that our methods consistently improve the translation quality when compared to previous constrained decoding algorithms, while the improvement over unconstrained translations depends on the familiarity of the model over the subject vocabulary and the quality of the vocabulary.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hauhio-friberg-2024-mitra">
<titleInfo>
<title>Mitra: Improving Terminologically Constrained Translation Quality with Backtranslations and Flag Diacritics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iikka</namePart>
<namePart type="family">Hauhio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Théo</namePart>
<namePart type="family">Friberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charlotte</namePart>
<namePart type="family">Prescott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Bayliss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Oakley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joanna</namePart>
<namePart type="family">Wright</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="family">Wrigley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingyi</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Gow-Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Bawden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Víctor</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Cadwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Cabarrão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantinos</namePart>
<namePart type="family">Chatzitheodorou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diptesh</namePart>
<namePart type="family">Kanojia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation (EAMT)</publisher>
<place>
<placeTerm type="text">Sheffield, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Terminologically constrained machine translation is a hot topic in the field of neural machine translation. One major way to categorize constrained translation methods is to divide them into “hard” constraints that are forced into the target language sentence using a special decoding algorithm, and “soft” constraints that are included in the input given to the model.We present a constrained translation pipeline that combines soft and hard constraints while being completely model-agnostic, i.e. our method can be used with any NMT or LLM model. In the “soft” part, we substitute the source language terms in the input sentence for the backtranslations of their target language equivalents. This causes the source sentence to be more similar to the intended translation, thus making it easier to translate for the model. In the “hard” part, we use a novel nondeterministic finite state transducer-based (NDFST) constraint recognition algorithm utilizing flag diacritics to force the model to use the desired target language terms.We test our model with both Finnish–English and English–Finnish real-world vocabularies. We find that our methods consistently improve the translation quality when compared to previous constrained decoding algorithms, while the improvement over unconstrained translations depends on the familiarity of the model over the subject vocabulary and the quality of the vocabulary.</abstract>
<identifier type="citekey">hauhio-friberg-2024-mitra</identifier>
<location>
<url>https://aclanthology.org/2024.eamt-1.12</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>100</start>
<end>115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mitra: Improving Terminologically Constrained Translation Quality with Backtranslations and Flag Diacritics
%A Hauhio, Iikka
%A Friberg, Théo
%Y Scarton, Carolina
%Y Prescott, Charlotte
%Y Bayliss, Chris
%Y Oakley, Chris
%Y Wright, Joanna
%Y Wrigley, Stuart
%Y Song, Xingyi
%Y Gow-Smith, Edward
%Y Bawden, Rachel
%Y Sánchez-Cartagena, Víctor M.
%Y Cadwell, Patrick
%Y Lapshinova-Koltunski, Ekaterina
%Y Cabarrão, Vera
%Y Chatzitheodorou, Konstantinos
%Y Nurminen, Mary
%Y Kanojia, Diptesh
%Y Moniz, Helena
%S Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)
%D 2024
%8 June
%I European Association for Machine Translation (EAMT)
%C Sheffield, UK
%F hauhio-friberg-2024-mitra
%X Terminologically constrained machine translation is a hot topic in the field of neural machine translation. One major way to categorize constrained translation methods is to divide them into “hard” constraints that are forced into the target language sentence using a special decoding algorithm, and “soft” constraints that are included in the input given to the model.We present a constrained translation pipeline that combines soft and hard constraints while being completely model-agnostic, i.e. our method can be used with any NMT or LLM model. In the “soft” part, we substitute the source language terms in the input sentence for the backtranslations of their target language equivalents. This causes the source sentence to be more similar to the intended translation, thus making it easier to translate for the model. In the “hard” part, we use a novel nondeterministic finite state transducer-based (NDFST) constraint recognition algorithm utilizing flag diacritics to force the model to use the desired target language terms.We test our model with both Finnish–English and English–Finnish real-world vocabularies. We find that our methods consistently improve the translation quality when compared to previous constrained decoding algorithms, while the improvement over unconstrained translations depends on the familiarity of the model over the subject vocabulary and the quality of the vocabulary.
%U https://aclanthology.org/2024.eamt-1.12
%P 100-115
Markdown (Informal)
[Mitra: Improving Terminologically Constrained Translation Quality with Backtranslations and Flag Diacritics](https://aclanthology.org/2024.eamt-1.12) (Hauhio & Friberg, EAMT 2024)
ACL