@inproceedings{nevado-etal-2004-translation,
title = "Translation Memories Enrichment by Statistical Bilingual Segmentation",
author = "Nevado, Francisco and
Casacuberta, Francisco and
Landa, Josu",
editor = "Lino, Maria Teresa and
Xavier, Maria Francisca and
Ferreira, F{\'a}tima and
Costa, Rute and
Silva, Raquel",
booktitle = "Proceedings of the Fourth International Conference on Language Resources and Evaluation ({LREC}{'}04)",
month = may,
year = "2004",
address = "Lisbon, Portugal",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2004/pdf/443.pdf",
abstract = "A majority of Machine Aided Translation systems are based on comparisons between a source sentence and reference sentences stored in Translation Memories (TMs). The translation search is done by looking for sentences in a database which are similar to the source sentence. TMs have two basic limitations: the dependency on the repetition of complete sentences and the high cost of building a TM. As human translators do not only remember sentences from their preceding translations, but they also decompose the sentence to be translated and work with smaller units, it would be desirable to enrich the TM database with smaller translation units. This enrichment should also be automatic in order not to increase the cost of building a TM. We propose the application of two automatic bilingual segmentation techniques based on statistical translation methods in order to create new, shorter bilingual segments to be included in a TM database. An evaluation of the two techniques is carried out for a bilingual Basque-Spanish task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nevado-etal-2004-translation">
<titleInfo>
<title>Translation Memories Enrichment by Statistical Bilingual Segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Nevado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Casacuberta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josu</namePart>
<namePart type="family">Landa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2004-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Lino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Francisca</namePart>
<namePart type="family">Xavier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fátima</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rute</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Lisbon, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A majority of Machine Aided Translation systems are based on comparisons between a source sentence and reference sentences stored in Translation Memories (TMs). The translation search is done by looking for sentences in a database which are similar to the source sentence. TMs have two basic limitations: the dependency on the repetition of complete sentences and the high cost of building a TM. As human translators do not only remember sentences from their preceding translations, but they also decompose the sentence to be translated and work with smaller units, it would be desirable to enrich the TM database with smaller translation units. This enrichment should also be automatic in order not to increase the cost of building a TM. We propose the application of two automatic bilingual segmentation techniques based on statistical translation methods in order to create new, shorter bilingual segments to be included in a TM database. An evaluation of the two techniques is carried out for a bilingual Basque-Spanish task.</abstract>
<identifier type="citekey">nevado-etal-2004-translation</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/443.pdf</url>
</location>
<part>
<date>2004-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Translation Memories Enrichment by Statistical Bilingual Segmentation
%A Nevado, Francisco
%A Casacuberta, Francisco
%A Landa, Josu
%Y Lino, Maria Teresa
%Y Xavier, Maria Francisca
%Y Ferreira, Fátima
%Y Costa, Rute
%Y Silva, Raquel
%S Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)
%D 2004
%8 May
%I European Language Resources Association (ELRA)
%C Lisbon, Portugal
%F nevado-etal-2004-translation
%X A majority of Machine Aided Translation systems are based on comparisons between a source sentence and reference sentences stored in Translation Memories (TMs). The translation search is done by looking for sentences in a database which are similar to the source sentence. TMs have two basic limitations: the dependency on the repetition of complete sentences and the high cost of building a TM. As human translators do not only remember sentences from their preceding translations, but they also decompose the sentence to be translated and work with smaller units, it would be desirable to enrich the TM database with smaller translation units. This enrichment should also be automatic in order not to increase the cost of building a TM. We propose the application of two automatic bilingual segmentation techniques based on statistical translation methods in order to create new, shorter bilingual segments to be included in a TM database. An evaluation of the two techniques is carried out for a bilingual Basque-Spanish task.
%U http://www.lrec-conf.org/proceedings/lrec2004/pdf/443.pdf
Markdown (Informal)
[Translation Memories Enrichment by Statistical Bilingual Segmentation](http://www.lrec-conf.org/proceedings/lrec2004/pdf/443.pdf) (Nevado et al., LREC 2004)
ACL