@inproceedings{q-etal-2011-deriving,
title = "Deriving translation units using small additional corpora",
author = "Henr{\'i}quez Q., Carlos A. and
Mari{\~n}o, Jos{\'e} B. and
Banchs, Rafael E.",
editor = "Forcada, Mikel L. and
Depraetere, Heidi and
Vandeghinste, Vincent",
booktitle = "Proceedings of the 15th Annual Conference of the European Association for Machine Translation",
month = may # " 30–31",
year = "2011",
address = "Leuven, Belgium",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2011.eamt-1.18/",
abstract = "We present a novel strategy to derive new translation units using an additional bilingual corpus and a previously trained SMT system. The units were used to adapt the SMT system. The derivation process can be applied when the additional corpus is very small compared with the original train corpus and it does not require to compute new word alignments using all corpora. The strategy is based in the Levenshtein Distance and its resulting path. We reported a statistically significant improvement, with a confidence level of 99{\%}, when adapting an Ngram-based Catalan-Spanish system using an additional corpus that represents less than 0.5{\%} of the original train corpus. The additional translation units were able to solve morphological and lexical errors and added previously unknown words to the vocabulary."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="q-etal-2011-deriving">
<titleInfo>
<title>Deriving translation units using small additional corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Henríquez Q.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Mariño</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Banchs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-may 30–31</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heidi</namePart>
<namePart type="family">Depraetere</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Vandeghinste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Leuven, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a novel strategy to derive new translation units using an additional bilingual corpus and a previously trained SMT system. The units were used to adapt the SMT system. The derivation process can be applied when the additional corpus is very small compared with the original train corpus and it does not require to compute new word alignments using all corpora. The strategy is based in the Levenshtein Distance and its resulting path. We reported a statistically significant improvement, with a confidence level of 99%, when adapting an Ngram-based Catalan-Spanish system using an additional corpus that represents less than 0.5% of the original train corpus. The additional translation units were able to solve morphological and lexical errors and added previously unknown words to the vocabulary.</abstract>
<identifier type="citekey">q-etal-2011-deriving</identifier>
<location>
<url>https://aclanthology.org/2011.eamt-1.18/</url>
</location>
<part>
<date>2011-may 30–31</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Deriving translation units using small additional corpora
%A Henríquez Q., Carlos A.
%A Mariño, José B.
%A Banchs, Rafael E.
%Y Forcada, Mikel L.
%Y Depraetere, Heidi
%Y Vandeghinste, Vincent
%S Proceedings of the 15th Annual Conference of the European Association for Machine Translation
%D 2011
%8 may 30–31
%I European Association for Machine Translation
%C Leuven, Belgium
%F q-etal-2011-deriving
%X We present a novel strategy to derive new translation units using an additional bilingual corpus and a previously trained SMT system. The units were used to adapt the SMT system. The derivation process can be applied when the additional corpus is very small compared with the original train corpus and it does not require to compute new word alignments using all corpora. The strategy is based in the Levenshtein Distance and its resulting path. We reported a statistically significant improvement, with a confidence level of 99%, when adapting an Ngram-based Catalan-Spanish system using an additional corpus that represents less than 0.5% of the original train corpus. The additional translation units were able to solve morphological and lexical errors and added previously unknown words to the vocabulary.
%U https://aclanthology.org/2011.eamt-1.18/
Markdown (Informal)
[Deriving translation units using small additional corpora](https://aclanthology.org/2011.eamt-1.18/) (Henríquez Q. et al., EAMT 2011)
ACL
- Carlos A. Henríquez Q., José B. Mariño, and Rafael E. Banchs. 2011. Deriving translation units using small additional corpora. In Proceedings of the 15th Annual Conference of the European Association for Machine Translation, Leuven, Belgium. European Association for Machine Translation.