@inproceedings{popovic-poncelas-2020-neural,
title = "Neural Machine Translation between similar {S}outh-{S}lavic languages",
author = "Popovi{\'c}, Maja and
Poncelas, Alberto",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.51",
pages = "430--436",
abstract = "This paper describes the ADAPT-DCU machine translation systems built for the WMT 2020 shared task on Similar Language Translation. We explored several set-ups for NMT for Croatian{--}Slovenian and Serbian{--}Slovenian language pairs in both translation directions. Our experiments focus on different amounts and types of training data: we first apply basic filtering on the OpenSubtitles training corpora, then we perform additional cleaning of remaining misaligned segments based on character n-gram matching. Finally, we make use of additional monolingual data by creating synthetic parallel data through back-translation. Automatic evaluation shows that multilingual systems with joint Serbian and Croatian data are better than bilingual, as well as that character-based cleaning leads to improved scores while using less data. The results also confirm once more that adding back-translated data further improves the performance, especially when the synthetic data is similar to the desired domain of the development and test set. This, however, might come at a price of prolonged training time, especially for multitarget systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="popovic-poncelas-2020-neural">
<titleInfo>
<title>Neural Machine Translation between similar South-Slavic languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Poncelas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the ADAPT-DCU machine translation systems built for the WMT 2020 shared task on Similar Language Translation. We explored several set-ups for NMT for Croatian–Slovenian and Serbian–Slovenian language pairs in both translation directions. Our experiments focus on different amounts and types of training data: we first apply basic filtering on the OpenSubtitles training corpora, then we perform additional cleaning of remaining misaligned segments based on character n-gram matching. Finally, we make use of additional monolingual data by creating synthetic parallel data through back-translation. Automatic evaluation shows that multilingual systems with joint Serbian and Croatian data are better than bilingual, as well as that character-based cleaning leads to improved scores while using less data. The results also confirm once more that adding back-translated data further improves the performance, especially when the synthetic data is similar to the desired domain of the development and test set. This, however, might come at a price of prolonged training time, especially for multitarget systems.</abstract>
<identifier type="citekey">popovic-poncelas-2020-neural</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.51</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>430</start>
<end>436</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Machine Translation between similar South-Slavic languages
%A Popović, Maja
%A Poncelas, Alberto
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F popovic-poncelas-2020-neural
%X This paper describes the ADAPT-DCU machine translation systems built for the WMT 2020 shared task on Similar Language Translation. We explored several set-ups for NMT for Croatian–Slovenian and Serbian–Slovenian language pairs in both translation directions. Our experiments focus on different amounts and types of training data: we first apply basic filtering on the OpenSubtitles training corpora, then we perform additional cleaning of remaining misaligned segments based on character n-gram matching. Finally, we make use of additional monolingual data by creating synthetic parallel data through back-translation. Automatic evaluation shows that multilingual systems with joint Serbian and Croatian data are better than bilingual, as well as that character-based cleaning leads to improved scores while using less data. The results also confirm once more that adding back-translated data further improves the performance, especially when the synthetic data is similar to the desired domain of the development and test set. This, however, might come at a price of prolonged training time, especially for multitarget systems.
%U https://aclanthology.org/2020.wmt-1.51
%P 430-436
Markdown (Informal)
[Neural Machine Translation between similar South-Slavic languages](https://aclanthology.org/2020.wmt-1.51) (Popović & Poncelas, WMT 2020)
ACL