@inproceedings{sato-2012-dictionary,
title = "Dictionary Look-up with Katakana Variant Recognition",
author = "Sato, Satoshi",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/282_Paper.pdf",
pages = "249--255",
abstract = "The Japanese language has rich variety and quantity of word variant. Since 1980s, it has been recognized that this richness becomes an obstacle against natural language processing. A complete solution, however, has not been presented yet. This paper proposes a method to recognize Katakana variants―a major type of word variant in Japanese―in the process of dictionary look-up. For a given set of variant generation rules, the method executes variant generation and entry retrieval simultaneously and efficiently. We have developed the seven-layered rule set (216 rules in total) according to the specification manual of UniDic-2.1.0 and other sources. An experiment shows that the spelling-variant generator with 102 rules in the first five layers is almost perfect. Another experiment shows that the form-variant generator with all 216 rules is powerful and 77.7{\%} of multiple spellings of Katakana loanwords are unnecessary (i.e., can be removed). This result means that the proposed method can drastically reduce the number of variants that we have to register into a dictionary in advance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sato-2012-dictionary">
<titleInfo>
<title>Dictionary Look-up with Katakana Variant Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Sato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Japanese language has rich variety and quantity of word variant. Since 1980s, it has been recognized that this richness becomes an obstacle against natural language processing. A complete solution, however, has not been presented yet. This paper proposes a method to recognize Katakana variants―a major type of word variant in Japanese―in the process of dictionary look-up. For a given set of variant generation rules, the method executes variant generation and entry retrieval simultaneously and efficiently. We have developed the seven-layered rule set (216 rules in total) according to the specification manual of UniDic-2.1.0 and other sources. An experiment shows that the spelling-variant generator with 102 rules in the first five layers is almost perfect. Another experiment shows that the form-variant generator with all 216 rules is powerful and 77.7% of multiple spellings of Katakana loanwords are unnecessary (i.e., can be removed). This result means that the proposed method can drastically reduce the number of variants that we have to register into a dictionary in advance.</abstract>
<identifier type="citekey">sato-2012-dictionary</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/282_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>249</start>
<end>255</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dictionary Look-up with Katakana Variant Recognition
%A Sato, Satoshi
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F sato-2012-dictionary
%X The Japanese language has rich variety and quantity of word variant. Since 1980s, it has been recognized that this richness becomes an obstacle against natural language processing. A complete solution, however, has not been presented yet. This paper proposes a method to recognize Katakana variants―a major type of word variant in Japanese―in the process of dictionary look-up. For a given set of variant generation rules, the method executes variant generation and entry retrieval simultaneously and efficiently. We have developed the seven-layered rule set (216 rules in total) according to the specification manual of UniDic-2.1.0 and other sources. An experiment shows that the spelling-variant generator with 102 rules in the first five layers is almost perfect. Another experiment shows that the form-variant generator with all 216 rules is powerful and 77.7% of multiple spellings of Katakana loanwords are unnecessary (i.e., can be removed). This result means that the proposed method can drastically reduce the number of variants that we have to register into a dictionary in advance.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/282_Paper.pdf
%P 249-255
Markdown (Informal)
[Dictionary Look-up with Katakana Variant Recognition](http://www.lrec-conf.org/proceedings/lrec2012/pdf/282_Paper.pdf) (Sato, LREC 2012)
ACL