@inproceedings{yamaguchi-2014-building,
title = "Building a Database of {J}apanese Adjective Examples from Special Purpose Web Corpora",
author = "Yamaguchi, Masaya",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/1075_Paper.pdf",
pages = "3684--3688",
abstract = "It is often difficult to collect many examples for low-frequency words from a single general purpose corpus. In this paper, I present a method of building a database of Japanese adjective examples from special purpose Web corpora (SPW corpora) and investigates the characteristics of examples in the database by comparison with examples that are collected from a general purpose Web corpus (GPW corpus). My proposed method construct a SPW corpus for each adjective considering to collect examples that have the following features: (i) non-bias, (ii) the distribution of examples extracted from every SPW corpus bears much similarity to that of examples extracted from a GPW corpus. The results of experiments shows the following: (i) my proposed method can collect many examples rapidly. The number of examples extracted from SPW corpora is more than 8.0 times (median value) greater than that from the GPW corpus. (ii) the distributions of co-occurrence words for adjectives in the database are similar to those taken from the GPW corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yamaguchi-2014-building">
<titleInfo>
<title>Building a Database of Japanese Adjective Examples from Special Purpose Web Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masaya</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It is often difficult to collect many examples for low-frequency words from a single general purpose corpus. In this paper, I present a method of building a database of Japanese adjective examples from special purpose Web corpora (SPW corpora) and investigates the characteristics of examples in the database by comparison with examples that are collected from a general purpose Web corpus (GPW corpus). My proposed method construct a SPW corpus for each adjective considering to collect examples that have the following features: (i) non-bias, (ii) the distribution of examples extracted from every SPW corpus bears much similarity to that of examples extracted from a GPW corpus. The results of experiments shows the following: (i) my proposed method can collect many examples rapidly. The number of examples extracted from SPW corpora is more than 8.0 times (median value) greater than that from the GPW corpus. (ii) the distributions of co-occurrence words for adjectives in the database are similar to those taken from the GPW corpus.</abstract>
<identifier type="citekey">yamaguchi-2014-building</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1075_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>3684</start>
<end>3688</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Database of Japanese Adjective Examples from Special Purpose Web Corpora
%A Yamaguchi, Masaya
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F yamaguchi-2014-building
%X It is often difficult to collect many examples for low-frequency words from a single general purpose corpus. In this paper, I present a method of building a database of Japanese adjective examples from special purpose Web corpora (SPW corpora) and investigates the characteristics of examples in the database by comparison with examples that are collected from a general purpose Web corpus (GPW corpus). My proposed method construct a SPW corpus for each adjective considering to collect examples that have the following features: (i) non-bias, (ii) the distribution of examples extracted from every SPW corpus bears much similarity to that of examples extracted from a GPW corpus. The results of experiments shows the following: (i) my proposed method can collect many examples rapidly. The number of examples extracted from SPW corpora is more than 8.0 times (median value) greater than that from the GPW corpus. (ii) the distributions of co-occurrence words for adjectives in the database are similar to those taken from the GPW corpus.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/1075_Paper.pdf
%P 3684-3688
Markdown (Informal)
[Building a Database of Japanese Adjective Examples from Special Purpose Web Corpora](http://www.lrec-conf.org/proceedings/lrec2014/pdf/1075_Paper.pdf) (Yamaguchi, LREC 2014)
ACL