@inproceedings{muller-2017-treatment,
title = "Treatment of Markup in Statistical Machine Translation",
author = {M{\"u}ller, Mathias},
editor = {Webber, Bonnie and
Popescu-Belis, Andrei and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Third Workshop on Discourse in Machine Translation",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4804",
doi = "10.18653/v1/W17-4804",
pages = "36--46",
abstract = "We present work on handling XML markup in Statistical Machine Translation (SMT). The methods we propose can be used to effectively preserve markup (for instance inline formatting or structure) and to place markup correctly in a machine-translated segment. We evaluate our approaches with parallel data that naturally contains markup or where markup was inserted to create synthetic examples. In our experiments, hybrid reinsertion has proven the most accurate method to handle markup, while alignment masking and alignment reinsertion should be regarded as viable alternatives. We provide implementations of all the methods described and they are freely available as an open-source framework.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="muller-2017-treatment">
<titleInfo>
<title>Treatment of Markup in Statistical Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Müller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Discourse in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Popescu-Belis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present work on handling XML markup in Statistical Machine Translation (SMT). The methods we propose can be used to effectively preserve markup (for instance inline formatting or structure) and to place markup correctly in a machine-translated segment. We evaluate our approaches with parallel data that naturally contains markup or where markup was inserted to create synthetic examples. In our experiments, hybrid reinsertion has proven the most accurate method to handle markup, while alignment masking and alignment reinsertion should be regarded as viable alternatives. We provide implementations of all the methods described and they are freely available as an open-source framework.</abstract>
<identifier type="citekey">muller-2017-treatment</identifier>
<identifier type="doi">10.18653/v1/W17-4804</identifier>
<location>
<url>https://aclanthology.org/W17-4804</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>36</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Treatment of Markup in Statistical Machine Translation
%A Müller, Mathias
%Y Webber, Bonnie
%Y Popescu-Belis, Andrei
%Y Tiedemann, Jörg
%S Proceedings of the Third Workshop on Discourse in Machine Translation
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F muller-2017-treatment
%X We present work on handling XML markup in Statistical Machine Translation (SMT). The methods we propose can be used to effectively preserve markup (for instance inline formatting or structure) and to place markup correctly in a machine-translated segment. We evaluate our approaches with parallel data that naturally contains markup or where markup was inserted to create synthetic examples. In our experiments, hybrid reinsertion has proven the most accurate method to handle markup, while alignment masking and alignment reinsertion should be regarded as viable alternatives. We provide implementations of all the methods described and they are freely available as an open-source framework.
%R 10.18653/v1/W17-4804
%U https://aclanthology.org/W17-4804
%U https://doi.org/10.18653/v1/W17-4804
%P 36-46
Markdown (Informal)
[Treatment of Markup in Statistical Machine Translation](https://aclanthology.org/W17-4804) (Müller, DiscoMT 2017)
ACL