@inproceedings{saito-etal-2017-automatically,
title = "Automatically Extracting Variant-Normalization Pairs for {J}apanese Text Normalization",
author = "Saito, Itsumi and
Nishida, Kyosuke and
Sadamitsu, Kugatsu and
Saito, Kuniko and
Tomita, Junji",
editor = "Kondrak, Greg and
Watanabe, Taro",
booktitle = "Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Asian Federation of Natural Language Processing",
url = "https://aclanthology.org/I17-1094",
pages = "937--946",
abstract = "Social media texts, such as tweets from Twitter, contain many types of non-standard tokens, and the number of normalization approaches for handling such noisy text has been increasing. We present a method for automatically extracting pairs of a variant word and its normal form from unsegmented text on the basis of a pair-wise similarity approach. We incorporated the acquired variant-normalization pairs into Japanese morphological analysis. The experimental results show that our method can extract widely covered variants from large Twitter data and improve the recall of normalization without degrading the overall accuracy of Japanese morphological analysis.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saito-etal-2017-automatically">
<titleInfo>
<title>Automatically Extracting Variant-Normalization Pairs for Japanese Text Normalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Itsumi</namePart>
<namePart type="family">Saito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyosuke</namePart>
<namePart type="family">Nishida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kugatsu</namePart>
<namePart type="family">Sadamitsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kuniko</namePart>
<namePart type="family">Saito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junji</namePart>
<namePart type="family">Tomita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Kondrak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Asian Federation of Natural Language Processing</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media texts, such as tweets from Twitter, contain many types of non-standard tokens, and the number of normalization approaches for handling such noisy text has been increasing. We present a method for automatically extracting pairs of a variant word and its normal form from unsegmented text on the basis of a pair-wise similarity approach. We incorporated the acquired variant-normalization pairs into Japanese morphological analysis. The experimental results show that our method can extract widely covered variants from large Twitter data and improve the recall of normalization without degrading the overall accuracy of Japanese morphological analysis.</abstract>
<identifier type="citekey">saito-etal-2017-automatically</identifier>
<location>
<url>https://aclanthology.org/I17-1094</url>
</location>
<part>
<date>2017-11</date>
<extent unit="page">
<start>937</start>
<end>946</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatically Extracting Variant-Normalization Pairs for Japanese Text Normalization
%A Saito, Itsumi
%A Nishida, Kyosuke
%A Sadamitsu, Kugatsu
%A Saito, Kuniko
%A Tomita, Junji
%Y Kondrak, Greg
%Y Watanabe, Taro
%S Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2017
%8 November
%I Asian Federation of Natural Language Processing
%C Taipei, Taiwan
%F saito-etal-2017-automatically
%X Social media texts, such as tweets from Twitter, contain many types of non-standard tokens, and the number of normalization approaches for handling such noisy text has been increasing. We present a method for automatically extracting pairs of a variant word and its normal form from unsegmented text on the basis of a pair-wise similarity approach. We incorporated the acquired variant-normalization pairs into Japanese morphological analysis. The experimental results show that our method can extract widely covered variants from large Twitter data and improve the recall of normalization without degrading the overall accuracy of Japanese morphological analysis.
%U https://aclanthology.org/I17-1094
%P 937-946
Markdown (Informal)
[Automatically Extracting Variant-Normalization Pairs for Japanese Text Normalization](https://aclanthology.org/I17-1094) (Saito et al., IJCNLP 2017)
ACL