@inproceedings{costa-jussa-2017-catalan,
title = "Why {C}atalan-{S}panish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies",
author = "Costa-juss{\`a}, Marta R.",
editor = {Nakov, Preslav and
Zampieri, Marcos and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shevin and
Ali, Ahmed},
booktitle = "Proceedings of the Fourth Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial)",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1207",
doi = "10.18653/v1/W17-1207",
pages = "55--62",
abstract = "Catalan and Spanish are two related languages given that both derive from Latin. They share similarities in several linguistic levels including morphology, syntax and semantics. This makes them particularly interesting for the MT task. Given the recent appearance and popularity of neural MT, this paper analyzes the performance of this new approach compared to the well-established rule-based and phrase-based MT systems. Experiments are reported on a large database of 180 million words. Results, in terms of standard automatic measures, show that neural MT clearly outperforms the rule-based and phrase-based MT system on in-domain test set, but it is worst in the out-of-domain test set. A naive system combination specially works for the latter. In-domain manual analysis shows that neural MT tends to improve both adequacy and fluency, for example, by being able to generate more natural translations instead of literal ones, choosing to the adequate target word when the source word has several translations and improving gender agreement. However, out-of-domain manual analysis shows how neural MT is more affected by unknown words or contexts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="costa-jussa-2017-catalan">
<titleInfo>
<title>Why Catalan-Spanish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shevin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Catalan and Spanish are two related languages given that both derive from Latin. They share similarities in several linguistic levels including morphology, syntax and semantics. This makes them particularly interesting for the MT task. Given the recent appearance and popularity of neural MT, this paper analyzes the performance of this new approach compared to the well-established rule-based and phrase-based MT systems. Experiments are reported on a large database of 180 million words. Results, in terms of standard automatic measures, show that neural MT clearly outperforms the rule-based and phrase-based MT system on in-domain test set, but it is worst in the out-of-domain test set. A naive system combination specially works for the latter. In-domain manual analysis shows that neural MT tends to improve both adequacy and fluency, for example, by being able to generate more natural translations instead of literal ones, choosing to the adequate target word when the source word has several translations and improving gender agreement. However, out-of-domain manual analysis shows how neural MT is more affected by unknown words or contexts.</abstract>
<identifier type="citekey">costa-jussa-2017-catalan</identifier>
<identifier type="doi">10.18653/v1/W17-1207</identifier>
<location>
<url>https://aclanthology.org/W17-1207</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>55</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Why Catalan-Spanish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies
%A Costa-jussà, Marta R.
%Y Nakov, Preslav
%Y Zampieri, Marcos
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Malmasi, Shevin
%Y Ali, Ahmed
%S Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F costa-jussa-2017-catalan
%X Catalan and Spanish are two related languages given that both derive from Latin. They share similarities in several linguistic levels including morphology, syntax and semantics. This makes them particularly interesting for the MT task. Given the recent appearance and popularity of neural MT, this paper analyzes the performance of this new approach compared to the well-established rule-based and phrase-based MT systems. Experiments are reported on a large database of 180 million words. Results, in terms of standard automatic measures, show that neural MT clearly outperforms the rule-based and phrase-based MT system on in-domain test set, but it is worst in the out-of-domain test set. A naive system combination specially works for the latter. In-domain manual analysis shows that neural MT tends to improve both adequacy and fluency, for example, by being able to generate more natural translations instead of literal ones, choosing to the adequate target word when the source word has several translations and improving gender agreement. However, out-of-domain manual analysis shows how neural MT is more affected by unknown words or contexts.
%R 10.18653/v1/W17-1207
%U https://aclanthology.org/W17-1207
%U https://doi.org/10.18653/v1/W17-1207
%P 55-62
Markdown (Informal)
[Why Catalan-Spanish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies](https://aclanthology.org/W17-1207) (Costa-jussà, VarDial 2017)
ACL