@inproceedings{zheng-moeller-2025-challenges,
title = "Challenges in Processing {C}hinese Texts Across Genres and Eras",
author = "Zheng, Minghao and
Moeller, Sarah",
editor = "Zhang, Chen and
Allaway, Emily and
Shen, Hua and
Miculicich, Lesly and
Li, Yinqiao and
M'hamdi, Meryem and
Limkonchotiwat, Peerat and
Bai, Richard He and
T.y.s.s., Santosh and
Han, Sophia Simeng and
Thapa, Surendrabikram and
Rim, Wiem Ben",
booktitle = "Proceedings of the 9th Widening NLP Workshop",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.winlp-main.34/",
pages = "230--234",
ISBN = "979-8-89176-351-7",
abstract = "Pre-trained Chinese Natural Language Processing (NLP) tools show reduced performance when analyzing poetry compared to prose. This study investigates the discrepancies between tools trained on either Classical or Modern Chinese prose when handling Classical Chinese prose and Classical Chinese poetry. Three experiments reveal error patterns that indicate the weaker performance on Classical Chinese poemsis due to challenges identifying word boundaries. Specifically, tools trained on Classical prose struggle recognizing word boundaries within Classical poetic structures and tools trained on Modern prose have difficulty with word segmentation in both Classical Chinese genres. These findings provide valuable insights into the limitations of current NLP tools for studying Classical Chinese literature."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-moeller-2025-challenges">
<titleInfo>
<title>Challenges in Processing Chinese Texts Across Genres and Eras</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minghao</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Widening NLP Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Allaway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lesly</namePart>
<namePart type="family">Miculicich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinqiao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meryem</namePart>
<namePart type="family">M’hamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">He</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.y.s.s.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="given">Simeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wiem</namePart>
<namePart type="given">Ben</namePart>
<namePart type="family">Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-351-7</identifier>
</relatedItem>
<abstract>Pre-trained Chinese Natural Language Processing (NLP) tools show reduced performance when analyzing poetry compared to prose. This study investigates the discrepancies between tools trained on either Classical or Modern Chinese prose when handling Classical Chinese prose and Classical Chinese poetry. Three experiments reveal error patterns that indicate the weaker performance on Classical Chinese poemsis due to challenges identifying word boundaries. Specifically, tools trained on Classical prose struggle recognizing word boundaries within Classical poetic structures and tools trained on Modern prose have difficulty with word segmentation in both Classical Chinese genres. These findings provide valuable insights into the limitations of current NLP tools for studying Classical Chinese literature.</abstract>
<identifier type="citekey">zheng-moeller-2025-challenges</identifier>
<location>
<url>https://aclanthology.org/2025.winlp-main.34/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>230</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Challenges in Processing Chinese Texts Across Genres and Eras
%A Zheng, Minghao
%A Moeller, Sarah
%Y Zhang, Chen
%Y Allaway, Emily
%Y Shen, Hua
%Y Miculicich, Lesly
%Y Li, Yinqiao
%Y M’hamdi, Meryem
%Y Limkonchotiwat, Peerat
%Y Bai, Richard He
%Y T.y.s.s., Santosh
%Y Han, Sophia Simeng
%Y Thapa, Surendrabikram
%Y Rim, Wiem Ben
%S Proceedings of the 9th Widening NLP Workshop
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-351-7
%F zheng-moeller-2025-challenges
%X Pre-trained Chinese Natural Language Processing (NLP) tools show reduced performance when analyzing poetry compared to prose. This study investigates the discrepancies between tools trained on either Classical or Modern Chinese prose when handling Classical Chinese prose and Classical Chinese poetry. Three experiments reveal error patterns that indicate the weaker performance on Classical Chinese poemsis due to challenges identifying word boundaries. Specifically, tools trained on Classical prose struggle recognizing word boundaries within Classical poetic structures and tools trained on Modern prose have difficulty with word segmentation in both Classical Chinese genres. These findings provide valuable insights into the limitations of current NLP tools for studying Classical Chinese literature.
%U https://aclanthology.org/2025.winlp-main.34/
%P 230-234
Markdown (Informal)
[Challenges in Processing Chinese Texts Across Genres and Eras](https://aclanthology.org/2025.winlp-main.34/) (Zheng & Moeller, WiNLP 2025)
ACL