@inproceedings{lankford-etal-2021-machine,
title = "Machine Translation in the Covid domain: an {E}nglish-{I}rish case study for {L}o{R}es{MT} 2021",
author = "Lankford, Seamus and
Afli, Haithem and
Way, Andy",
editor = "Ortega, John and
Ojha, Atul Kr. and
Kann, Katharina and
Liu, Chao-Hong",
booktitle = "Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-loresmt.15/",
pages = "144--150",
abstract = "Translation models for the specific domain of translating Covid data from English to Irish were developed for the LoResMT 2021 shared task. Domain adaptation techniques, using a Covid-adapted generic 55k corpus from the Directorate General of Translation, were applied. Fine-tuning, mixed fine-tuning and combined dataset approaches were compared with models trained on an extended in-domain dataset. As part of this study, an English-Irish dataset of Covid related data, from the Health and Education domains, was developed. The highestperforming model used a Transformer architecture trained with an extended in-domain Covid dataset. In the context of this study, we have demonstrated that extending an 8k in-domain baseline dataset by just 5k lines improved the BLEU score by 27 points."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lankford-etal-2021-machine">
<titleInfo>
<title>Machine Translation in the Covid domain: an English-Irish case study for LoResMT 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seamus</namePart>
<namePart type="family">Lankford</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haithem</namePart>
<namePart type="family">Afli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Translation models for the specific domain of translating Covid data from English to Irish were developed for the LoResMT 2021 shared task. Domain adaptation techniques, using a Covid-adapted generic 55k corpus from the Directorate General of Translation, were applied. Fine-tuning, mixed fine-tuning and combined dataset approaches were compared with models trained on an extended in-domain dataset. As part of this study, an English-Irish dataset of Covid related data, from the Health and Education domains, was developed. The highestperforming model used a Transformer architecture trained with an extended in-domain Covid dataset. In the context of this study, we have demonstrated that extending an 8k in-domain baseline dataset by just 5k lines improved the BLEU score by 27 points.</abstract>
<identifier type="citekey">lankford-etal-2021-machine</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-loresmt.15/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>144</start>
<end>150</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Translation in the Covid domain: an English-Irish case study for LoResMT 2021
%A Lankford, Seamus
%A Afli, Haithem
%A Way, Andy
%Y Ortega, John
%Y Ojha, Atul Kr.
%Y Kann, Katharina
%Y Liu, Chao-Hong
%S Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F lankford-etal-2021-machine
%X Translation models for the specific domain of translating Covid data from English to Irish were developed for the LoResMT 2021 shared task. Domain adaptation techniques, using a Covid-adapted generic 55k corpus from the Directorate General of Translation, were applied. Fine-tuning, mixed fine-tuning and combined dataset approaches were compared with models trained on an extended in-domain dataset. As part of this study, an English-Irish dataset of Covid related data, from the Health and Education domains, was developed. The highestperforming model used a Transformer architecture trained with an extended in-domain Covid dataset. In the context of this study, we have demonstrated that extending an 8k in-domain baseline dataset by just 5k lines improved the BLEU score by 27 points.
%U https://aclanthology.org/2021.mtsummit-loresmt.15/
%P 144-150
Markdown (Informal)
[Machine Translation in the Covid domain: an English-Irish case study for LoResMT 2021](https://aclanthology.org/2021.mtsummit-loresmt.15/) (Lankford et al., LoResMT 2021)
ACL