@article{deng-xue-2017-translation,
title = "Translation Divergences in {C}hinese{--}{E}nglish Machine Translation: An Empirical Investigation",
author = "Deng, Dun and
Xue, Nianwen",
journal = "Computational Linguistics",
volume = "43",
number = "3",
month = sep,
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/J17-3002",
doi = "10.1162/COLI_a_00292",
pages = "521--565",
abstract = "In this article, we conduct an empirical investigation of translation divergences between Chinese and English relying on a parallel treebank. To do this, we first devise a hierarchical alignment scheme where Chinese and English parse trees are aligned in a way that eliminates conflicts and redundancies between word alignments and syntactic parses to prevent the generation of spurious translation divergences. Using this Hierarchically Aligned Chinese{--}English Parallel Treebank (HACEPT), we are able to semi-automatically identify and categorize the translation divergences between the two languages and quantify each type of translation divergence. Our results show that the translation divergences are much broader than described in previous studies that are largely based on anecdotal evidence and linguistic knowledge. The distribution of the translation divergences also shows that some high-profile translation divergences that motivate previous research are actually very rare in our data, whereas other translation divergences that have previously received little attention actually exist in large quantities. We also show that HACEPT allows the extraction of syntax-based translation rules, most of which are expressive enough to capture the translation divergences, and point out that the syntactic annotation in existing treebanks is not optimal for extracting such translation rules. We also discuss the implications of our study for attempts to bridge translation divergences by devising shared semantic representations across languages. Our quantitative results lend further support to the observation that although it is possible to bridge some translation divergences with semantic representations, other translation divergences are open-ended, thus building a semantic representation that captures all possible translation divergences may be impractical.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deng-xue-2017-translation">
<titleInfo>
<title>Translation Divergences in Chinese–English Machine Translation: An Empirical Investigation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dun</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>In this article, we conduct an empirical investigation of translation divergences between Chinese and English relying on a parallel treebank. To do this, we first devise a hierarchical alignment scheme where Chinese and English parse trees are aligned in a way that eliminates conflicts and redundancies between word alignments and syntactic parses to prevent the generation of spurious translation divergences. Using this Hierarchically Aligned Chinese–English Parallel Treebank (HACEPT), we are able to semi-automatically identify and categorize the translation divergences between the two languages and quantify each type of translation divergence. Our results show that the translation divergences are much broader than described in previous studies that are largely based on anecdotal evidence and linguistic knowledge. The distribution of the translation divergences also shows that some high-profile translation divergences that motivate previous research are actually very rare in our data, whereas other translation divergences that have previously received little attention actually exist in large quantities. We also show that HACEPT allows the extraction of syntax-based translation rules, most of which are expressive enough to capture the translation divergences, and point out that the syntactic annotation in existing treebanks is not optimal for extracting such translation rules. We also discuss the implications of our study for attempts to bridge translation divergences by devising shared semantic representations across languages. Our quantitative results lend further support to the observation that although it is possible to bridge some translation divergences with semantic representations, other translation divergences are open-ended, thus building a semantic representation that captures all possible translation divergences may be impractical.</abstract>
<identifier type="citekey">deng-xue-2017-translation</identifier>
<identifier type="doi">10.1162/COLI_a_00292</identifier>
<location>
<url>https://aclanthology.org/J17-3002</url>
</location>
<part>
<date>2017-09</date>
<detail type="volume"><number>43</number></detail>
<detail type="issue"><number>3</number></detail>
<extent unit="page">
<start>521</start>
<end>565</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Translation Divergences in Chinese–English Machine Translation: An Empirical Investigation
%A Deng, Dun
%A Xue, Nianwen
%J Computational Linguistics
%D 2017
%8 September
%V 43
%N 3
%I MIT Press
%C Cambridge, MA
%F deng-xue-2017-translation
%X In this article, we conduct an empirical investigation of translation divergences between Chinese and English relying on a parallel treebank. To do this, we first devise a hierarchical alignment scheme where Chinese and English parse trees are aligned in a way that eliminates conflicts and redundancies between word alignments and syntactic parses to prevent the generation of spurious translation divergences. Using this Hierarchically Aligned Chinese–English Parallel Treebank (HACEPT), we are able to semi-automatically identify and categorize the translation divergences between the two languages and quantify each type of translation divergence. Our results show that the translation divergences are much broader than described in previous studies that are largely based on anecdotal evidence and linguistic knowledge. The distribution of the translation divergences also shows that some high-profile translation divergences that motivate previous research are actually very rare in our data, whereas other translation divergences that have previously received little attention actually exist in large quantities. We also show that HACEPT allows the extraction of syntax-based translation rules, most of which are expressive enough to capture the translation divergences, and point out that the syntactic annotation in existing treebanks is not optimal for extracting such translation rules. We also discuss the implications of our study for attempts to bridge translation divergences by devising shared semantic representations across languages. Our quantitative results lend further support to the observation that although it is possible to bridge some translation divergences with semantic representations, other translation divergences are open-ended, thus building a semantic representation that captures all possible translation divergences may be impractical.
%R 10.1162/COLI_a_00292
%U https://aclanthology.org/J17-3002
%U https://doi.org/10.1162/COLI_a_00292
%P 521-565
Markdown (Informal)
[Translation Divergences in Chinese–English Machine Translation: An Empirical Investigation](https://aclanthology.org/J17-3002) (Deng & Xue, CL 2017)
ACL