@inproceedings{kondo-etal-2025-text,
title = "Text Normalization for {J}apanese Sentiment Analysis",
author = "Kondo, Risa and
Teramen, Ayu and
Kajikawa, Reon and
Horiguchi, Koki and
Kajiwara, Tomoyuki and
Ninomiya, Takashi and
Hayashi, Hideaki and
Nakashima, Yuta and
Nagahara, Hajime",
editor = "Bak, JinYeong and
Goot, Rob van der and
Jang, Hyeju and
Buaphet, Weerayut and
Ramponi, Alan and
Xu, Wei and
Ritter, Alan",
booktitle = "Proceedings of the Tenth Workshop on Noisy and User-generated Text",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wnut-1.16/",
doi = "10.18653/v1/2025.wnut-1.16",
pages = "149--157",
ISBN = "979-8-89176-232-9",
abstract = "We manually normalize noisy Japanese expressions on social networking services (SNS) to improve the performance of sentiment polarity classification.Despite advances in pre-trained language models, informal expressions found in social media still plague natural language processing.In this study, we analyzed 6,000 posts from a sentiment analysis corpus for Japanese SNS text, and constructed a text normalization taxonomy consisting of 33 types of editing operations.Text normalization according to our taxonomy significantly improved the performance of BERT-based sentiment analysis in Japanese.Detailed analysis reveals that most types of editing operations each contribute to improve the performance of sentiment analysis."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kondo-etal-2025-text">
<titleInfo>
<title>Text Normalization for Japanese Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Risa</namePart>
<namePart type="family">Kondo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayu</namePart>
<namePart type="family">Teramen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reon</namePart>
<namePart type="family">Kajikawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koki</namePart>
<namePart type="family">Horiguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoyuki</namePart>
<namePart type="family">Kajiwara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Ninomiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideaki</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuta</namePart>
<namePart type="family">Nakashima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hajime</namePart>
<namePart type="family">Nagahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Workshop on Noisy and User-generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">JinYeong</namePart>
<namePart type="family">Bak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="given">van</namePart>
<namePart type="given">der</namePart>
<namePart type="family">Goot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyeju</namePart>
<namePart type="family">Jang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weerayut</namePart>
<namePart type="family">Buaphet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ramponi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-232-9</identifier>
</relatedItem>
<abstract>We manually normalize noisy Japanese expressions on social networking services (SNS) to improve the performance of sentiment polarity classification.Despite advances in pre-trained language models, informal expressions found in social media still plague natural language processing.In this study, we analyzed 6,000 posts from a sentiment analysis corpus for Japanese SNS text, and constructed a text normalization taxonomy consisting of 33 types of editing operations.Text normalization according to our taxonomy significantly improved the performance of BERT-based sentiment analysis in Japanese.Detailed analysis reveals that most types of editing operations each contribute to improve the performance of sentiment analysis.</abstract>
<identifier type="citekey">kondo-etal-2025-text</identifier>
<identifier type="doi">10.18653/v1/2025.wnut-1.16</identifier>
<location>
<url>https://aclanthology.org/2025.wnut-1.16/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>149</start>
<end>157</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text Normalization for Japanese Sentiment Analysis
%A Kondo, Risa
%A Teramen, Ayu
%A Kajikawa, Reon
%A Horiguchi, Koki
%A Kajiwara, Tomoyuki
%A Ninomiya, Takashi
%A Hayashi, Hideaki
%A Nakashima, Yuta
%A Nagahara, Hajime
%Y Bak, JinYeong
%Y Goot, Rob van der
%Y Jang, Hyeju
%Y Buaphet, Weerayut
%Y Ramponi, Alan
%Y Xu, Wei
%Y Ritter, Alan
%S Proceedings of the Tenth Workshop on Noisy and User-generated Text
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, USA
%@ 979-8-89176-232-9
%F kondo-etal-2025-text
%X We manually normalize noisy Japanese expressions on social networking services (SNS) to improve the performance of sentiment polarity classification.Despite advances in pre-trained language models, informal expressions found in social media still plague natural language processing.In this study, we analyzed 6,000 posts from a sentiment analysis corpus for Japanese SNS text, and constructed a text normalization taxonomy consisting of 33 types of editing operations.Text normalization according to our taxonomy significantly improved the performance of BERT-based sentiment analysis in Japanese.Detailed analysis reveals that most types of editing operations each contribute to improve the performance of sentiment analysis.
%R 10.18653/v1/2025.wnut-1.16
%U https://aclanthology.org/2025.wnut-1.16/
%U https://doi.org/10.18653/v1/2025.wnut-1.16
%P 149-157
Markdown (Informal)
[Text Normalization for Japanese Sentiment Analysis](https://aclanthology.org/2025.wnut-1.16/) (Kondo et al., WNUT 2025)
ACL
- Risa Kondo, Ayu Teramen, Reon Kajikawa, Koki Horiguchi, Tomoyuki Kajiwara, Takashi Ninomiya, Hideaki Hayashi, Yuta Nakashima, and Hajime Nagahara. 2025. Text Normalization for Japanese Sentiment Analysis. In Proceedings of the Tenth Workshop on Noisy and User-generated Text, pages 149–157, Albuquerque, New Mexico, USA. Association for Computational Linguistics.