@inproceedings{ma-cieri-2006-corpus,
title = "Corpus Support for Machine Translation at {LDC}",
author = "Ma, Xiaoyi and
Cieri, Christopher",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/754_pdf.pdf",
abstract = "This paper describes LDC's efforts in collecting, creating and processing different types of linguistic data, including lexicons, parallel text, multiple translation corpora, and human assessment of translation quality, to support the research and development in Machine Translation. Through a combination of different procedures and core technologies, the LDC was able to create very large, high quality, and cost-efficient corpora, which have contributed significantly to recent advances in Machine Translation. Multiple translation corpora and human assessment together facilitate, validate and improve automatic evaluation metrics, which are vital to the development of MT systems. The Bilingual Internet Text Search (BITS) and Champollion sentence aligner enable the finding and processing of large quantities of parallel text. All specifications and tools used by LDC and described in the paper are or will be available to the general public.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-cieri-2006-corpus">
<titleInfo>
<title>Corpus Support for Machine Translation at LDC</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaoyi</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes LDC’s efforts in collecting, creating and processing different types of linguistic data, including lexicons, parallel text, multiple translation corpora, and human assessment of translation quality, to support the research and development in Machine Translation. Through a combination of different procedures and core technologies, the LDC was able to create very large, high quality, and cost-efficient corpora, which have contributed significantly to recent advances in Machine Translation. Multiple translation corpora and human assessment together facilitate, validate and improve automatic evaluation metrics, which are vital to the development of MT systems. The Bilingual Internet Text Search (BITS) and Champollion sentence aligner enable the finding and processing of large quantities of parallel text. All specifications and tools used by LDC and described in the paper are or will be available to the general public.</abstract>
<identifier type="citekey">ma-cieri-2006-corpus</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/754_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus Support for Machine Translation at LDC
%A Ma, Xiaoyi
%A Cieri, Christopher
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F ma-cieri-2006-corpus
%X This paper describes LDC’s efforts in collecting, creating and processing different types of linguistic data, including lexicons, parallel text, multiple translation corpora, and human assessment of translation quality, to support the research and development in Machine Translation. Through a combination of different procedures and core technologies, the LDC was able to create very large, high quality, and cost-efficient corpora, which have contributed significantly to recent advances in Machine Translation. Multiple translation corpora and human assessment together facilitate, validate and improve automatic evaluation metrics, which are vital to the development of MT systems. The Bilingual Internet Text Search (BITS) and Champollion sentence aligner enable the finding and processing of large quantities of parallel text. All specifications and tools used by LDC and described in the paper are or will be available to the general public.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/754_pdf.pdf
Markdown (Informal)
[Corpus Support for Machine Translation at LDC](http://www.lrec-conf.org/proceedings/lrec2006/pdf/754_pdf.pdf) (Ma & Cieri, LREC 2006)
ACL
- Xiaoyi Ma and Christopher Cieri. 2006. Corpus Support for Machine Translation at LDC. In Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06), Genoa, Italy. European Language Resources Association (ELRA).