@inproceedings{yeh-etal-2022-multifaceted,
title = "Multifaceted Assessments of Traditional {C}hinese Word Segmentation Tool on Large Corpora",
author = "Yeh, Wen-Chao and
Hsieh, Yu-Lun and
Chang, Yung-Chun and
Hsu, Wen-Lian",
editor = "Chang, Yung-Chun and
Huang, Yi-Chin",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.24/",
pages = "193--199",
language = "zho",
abstract = "This study aims to evaluate three most popular word segmentation tool for a large Traditional Chinese corpus in terms of their efficiency, resource consumption, and cost. Specifically, we compare the performances of Jieba, CKIP, and MONPA on word segmentation, part-of-speech tagging and named entity recognition through extensive experiments. Experimental results show that MONPA using GPU for batch segmentation can greatly reduce the processing time of massive datasets. In addition, its features such as word segmentation, part-of-speech tagging, and named entity recognition are beneficial to downstream applications."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yeh-etal-2022-multifaceted">
<titleInfo>
<title>Multifaceted Assessments of Traditional Chinese Word Segmentation Tool on Large Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wen-Chao</namePart>
<namePart type="family">Yeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Lun</namePart>
<namePart type="family">Hsieh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen-Lian</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">zho</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Chin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study aims to evaluate three most popular word segmentation tool for a large Traditional Chinese corpus in terms of their efficiency, resource consumption, and cost. Specifically, we compare the performances of Jieba, CKIP, and MONPA on word segmentation, part-of-speech tagging and named entity recognition through extensive experiments. Experimental results show that MONPA using GPU for batch segmentation can greatly reduce the processing time of massive datasets. In addition, its features such as word segmentation, part-of-speech tagging, and named entity recognition are beneficial to downstream applications.</abstract>
<identifier type="citekey">yeh-etal-2022-multifaceted</identifier>
<location>
<url>https://aclanthology.org/2022.rocling-1.24/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>193</start>
<end>199</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multifaceted Assessments of Traditional Chinese Word Segmentation Tool on Large Corpora
%A Yeh, Wen-Chao
%A Hsieh, Yu-Lun
%A Chang, Yung-Chun
%A Hsu, Wen-Lian
%Y Chang, Yung-Chun
%Y Huang, Yi-Chin
%S Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)
%D 2022
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei, Taiwan
%G zho
%F yeh-etal-2022-multifaceted
%X This study aims to evaluate three most popular word segmentation tool for a large Traditional Chinese corpus in terms of their efficiency, resource consumption, and cost. Specifically, we compare the performances of Jieba, CKIP, and MONPA on word segmentation, part-of-speech tagging and named entity recognition through extensive experiments. Experimental results show that MONPA using GPU for batch segmentation can greatly reduce the processing time of massive datasets. In addition, its features such as word segmentation, part-of-speech tagging, and named entity recognition are beneficial to downstream applications.
%U https://aclanthology.org/2022.rocling-1.24/
%P 193-199
Markdown (Informal)
[Multifaceted Assessments of Traditional Chinese Word Segmentation Tool on Large Corpora](https://aclanthology.org/2022.rocling-1.24/) (Yeh et al., ROCLING 2022)
ACL