@inproceedings{rojas-aikawa-2006-predicting,
title = "Predicting {MT} Quality as a Function of the Source Language",
author = "Rojas, David M. and
Aikawa, Takako",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/472_pdf.pdf",
abstract = "This paper describes one phase of a large-scale machine translation (MT) quality assurance project. We explore a novel approach to discriminating MT-unsuitable source sentences by predicting the expected quality of the output. The resources required include a set of source/MT sentence pairs, human judgments on the output, a source parser, and an MT system. We extract a number of syntactic, semantic, and lexical features from the source sentences only and train a classifier that we call the Syntactic, Semantic, and Lexical Model (SSLM) (cf. Gamon et al., 2005; Liu {\&} Gildea, 2005; Rajman {\&} Hartley, 2001). Despite the simplicity of the approach, SSLM scores correlate with human judgments and can help determine whether sentences are suitable or unsuitable for translation by our MT system. SSLM also provides information about which source features impact MT quality, connecting this work with the field of controlled language (CL) (cf. Reuther, 2003; Nyberg {\&} Mitamura, 1996). With a focus on the input side of MT, SSLM differs greatly from evaluation approaches such as BLEU (Papineni et al., 2002), NIST (Doddington, 2002) and METEOR (Banerjee {\&} Lavie, 2005) in that these other systems compare MT output with reference sentences for evaluation and do not provide feedback regarding potentially problematic source material. Our method bridges the research areas of CL and MT evaluation by addressing the importance of providing MT-suitable English input to enhance output quality.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rojas-aikawa-2006-predicting">
<titleInfo>
<title>Predicting MT Quality as a Function of the Source Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Rojas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takako</namePart>
<namePart type="family">Aikawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes one phase of a large-scale machine translation (MT) quality assurance project. We explore a novel approach to discriminating MT-unsuitable source sentences by predicting the expected quality of the output. The resources required include a set of source/MT sentence pairs, human judgments on the output, a source parser, and an MT system. We extract a number of syntactic, semantic, and lexical features from the source sentences only and train a classifier that we call the Syntactic, Semantic, and Lexical Model (SSLM) (cf. Gamon et al., 2005; Liu & Gildea, 2005; Rajman & Hartley, 2001). Despite the simplicity of the approach, SSLM scores correlate with human judgments and can help determine whether sentences are suitable or unsuitable for translation by our MT system. SSLM also provides information about which source features impact MT quality, connecting this work with the field of controlled language (CL) (cf. Reuther, 2003; Nyberg & Mitamura, 1996). With a focus on the input side of MT, SSLM differs greatly from evaluation approaches such as BLEU (Papineni et al., 2002), NIST (Doddington, 2002) and METEOR (Banerjee & Lavie, 2005) in that these other systems compare MT output with reference sentences for evaluation and do not provide feedback regarding potentially problematic source material. Our method bridges the research areas of CL and MT evaluation by addressing the importance of providing MT-suitable English input to enhance output quality.</abstract>
<identifier type="citekey">rojas-aikawa-2006-predicting</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/472_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predicting MT Quality as a Function of the Source Language
%A Rojas, David M.
%A Aikawa, Takako
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F rojas-aikawa-2006-predicting
%X This paper describes one phase of a large-scale machine translation (MT) quality assurance project. We explore a novel approach to discriminating MT-unsuitable source sentences by predicting the expected quality of the output. The resources required include a set of source/MT sentence pairs, human judgments on the output, a source parser, and an MT system. We extract a number of syntactic, semantic, and lexical features from the source sentences only and train a classifier that we call the Syntactic, Semantic, and Lexical Model (SSLM) (cf. Gamon et al., 2005; Liu & Gildea, 2005; Rajman & Hartley, 2001). Despite the simplicity of the approach, SSLM scores correlate with human judgments and can help determine whether sentences are suitable or unsuitable for translation by our MT system. SSLM also provides information about which source features impact MT quality, connecting this work with the field of controlled language (CL) (cf. Reuther, 2003; Nyberg & Mitamura, 1996). With a focus on the input side of MT, SSLM differs greatly from evaluation approaches such as BLEU (Papineni et al., 2002), NIST (Doddington, 2002) and METEOR (Banerjee & Lavie, 2005) in that these other systems compare MT output with reference sentences for evaluation and do not provide feedback regarding potentially problematic source material. Our method bridges the research areas of CL and MT evaluation by addressing the importance of providing MT-suitable English input to enhance output quality.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/472_pdf.pdf
Markdown (Informal)
[Predicting MT Quality as a Function of the Source Language](http://www.lrec-conf.org/proceedings/lrec2006/pdf/472_pdf.pdf) (Rojas & Aikawa, LREC 2006)
ACL