@inproceedings{ma-etal-2008-selection,
title = "Selection of {J}apanese-{E}nglish Equivalents by Integrating High-quality Corpora and Huge Amounts of Web Data",
author = "Ma, Qing and
Nakao, Koichi and
Murata, Masaki and
Isahara, Hitoshi",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/107_paper.pdf",
abstract = "As a first step to developing systems that enable non-native speakers to output near-perfect English sentences for given mixed English-Japanese sentences, we propose new approaches for selecting English equivalents by using the number of hits for various contexts in large English corpora. As the large English corpora, we not only used the huge amounts of Web data but also the manually compiled large, high-quality English corpora. Using high-quality corpora enables us to accurately select equivalents, and using huge amounts of Web data enables us to resolve the problem of the shortage of hits that normally occurs when using only high-quality corpora. The types and lengths of contexts used to select equivalents are variable and optimally determined according to the number of hits in the corpora, so that performance can be further refined. Computer experiments showed that the precision of our methods was much higher than that of the existing methods for equivalent selection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-etal-2008-selection">
<titleInfo>
<title>Selection of Japanese-English Equivalents by Integrating High-quality Corpora and Huge Amounts of Web Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qing</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koichi</namePart>
<namePart type="family">Nakao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masaki</namePart>
<namePart type="family">Murata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As a first step to developing systems that enable non-native speakers to output near-perfect English sentences for given mixed English-Japanese sentences, we propose new approaches for selecting English equivalents by using the number of hits for various contexts in large English corpora. As the large English corpora, we not only used the huge amounts of Web data but also the manually compiled large, high-quality English corpora. Using high-quality corpora enables us to accurately select equivalents, and using huge amounts of Web data enables us to resolve the problem of the shortage of hits that normally occurs when using only high-quality corpora. The types and lengths of contexts used to select equivalents are variable and optimally determined according to the number of hits in the corpora, so that performance can be further refined. Computer experiments showed that the precision of our methods was much higher than that of the existing methods for equivalent selection.</abstract>
<identifier type="citekey">ma-etal-2008-selection</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/107_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Selection of Japanese-English Equivalents by Integrating High-quality Corpora and Huge Amounts of Web Data
%A Ma, Qing
%A Nakao, Koichi
%A Murata, Masaki
%A Isahara, Hitoshi
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F ma-etal-2008-selection
%X As a first step to developing systems that enable non-native speakers to output near-perfect English sentences for given mixed English-Japanese sentences, we propose new approaches for selecting English equivalents by using the number of hits for various contexts in large English corpora. As the large English corpora, we not only used the huge amounts of Web data but also the manually compiled large, high-quality English corpora. Using high-quality corpora enables us to accurately select equivalents, and using huge amounts of Web data enables us to resolve the problem of the shortage of hits that normally occurs when using only high-quality corpora. The types and lengths of contexts used to select equivalents are variable and optimally determined according to the number of hits in the corpora, so that performance can be further refined. Computer experiments showed that the precision of our methods was much higher than that of the existing methods for equivalent selection.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/107_paper.pdf
Markdown (Informal)
[Selection of Japanese-English Equivalents by Integrating High-quality Corpora and Huge Amounts of Web Data](http://www.lrec-conf.org/proceedings/lrec2008/pdf/107_paper.pdf) (Ma et al., LREC 2008)
ACL