@inproceedings{chen-2026-ontology,
title = "Ontology-oriented lexico-semantic modeling and neural classification of {C}hinese ch{\'e}ngyǔ: A culture-aware {NLP} approach",
author = "Chen, Lian",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.c3nlp-1.12/",
pages = "150--160",
ISBN = "979-8-89176-420-0",
abstract = "This paper proposes a semi-automatic lexico-semantic modeling framework for Chinese ch{\'e}ngyǔ containing body-part and animal lexemes. The framework combines manual semantic annotation, lightweight RDF/OWL formalization and semantic classification in order to investigate whether lexical mediators such as 心 x{\={i}}n ``heart/mind'', 口 kǒu ``mouth'' or 马 mǎ ``horse'' are sufficient to predict idiomatic semantic interpretation. Based on 440 annotated ch{\'e}ngyǔ normalized into 18 semantic categories, we compare three classification approaches: a rule-based keyword baseline, character n-gram TF-IDF with logistic regression, and BERT-base-chinese. The results show that lexical mediators cannot be directly equated with semantic categories and that TF-IDF achieves the best overall performance, suggesting that lightweight character-level representations remain robust for very short idioms in low-resource settings. The study contributes an interpretable RDF/OWL-compatible resource for culture-aware modeling of Chinese idioms."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-2026-ontology">
<titleInfo>
<title>Ontology-oriented lexico-semantic modeling and neural classification of Chinese chéngyǔ: A culture-aware NLP approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lian</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunipa</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hershcovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">BOlei</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ife</namePart>
<namePart type="family">Adebara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-420-0</identifier>
</relatedItem>
<abstract>This paper proposes a semi-automatic lexico-semantic modeling framework for Chinese chéngyǔ containing body-part and animal lexemes. The framework combines manual semantic annotation, lightweight RDF/OWL formalization and semantic classification in order to investigate whether lexical mediators such as 心 xīn “heart/mind”, 口 kǒu “mouth” or 马 mǎ “horse” are sufficient to predict idiomatic semantic interpretation. Based on 440 annotated chéngyǔ normalized into 18 semantic categories, we compare three classification approaches: a rule-based keyword baseline, character n-gram TF-IDF with logistic regression, and BERT-base-chinese. The results show that lexical mediators cannot be directly equated with semantic categories and that TF-IDF achieves the best overall performance, suggesting that lightweight character-level representations remain robust for very short idioms in low-resource settings. The study contributes an interpretable RDF/OWL-compatible resource for culture-aware modeling of Chinese idioms.</abstract>
<identifier type="citekey">chen-2026-ontology</identifier>
<location>
<url>https://aclanthology.org/2026.c3nlp-1.12/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>150</start>
<end>160</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Ontology-oriented lexico-semantic modeling and neural classification of Chinese chéngyǔ: A culture-aware NLP approach
%A Chen, Lian
%Y Prabhakaran, Vinodkumar
%Y Dev, Sunipa
%Y Benotti, Luciana
%Y Hershcovich, Daniel
%Y Cao, Yong
%Y Zhou, Li
%Y Ma, BOlei
%Y Adebara, Ife
%S Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-420-0
%F chen-2026-ontology
%X This paper proposes a semi-automatic lexico-semantic modeling framework for Chinese chéngyǔ containing body-part and animal lexemes. The framework combines manual semantic annotation, lightweight RDF/OWL formalization and semantic classification in order to investigate whether lexical mediators such as 心 xīn “heart/mind”, 口 kǒu “mouth” or 马 mǎ “horse” are sufficient to predict idiomatic semantic interpretation. Based on 440 annotated chéngyǔ normalized into 18 semantic categories, we compare three classification approaches: a rule-based keyword baseline, character n-gram TF-IDF with logistic regression, and BERT-base-chinese. The results show that lexical mediators cannot be directly equated with semantic categories and that TF-IDF achieves the best overall performance, suggesting that lightweight character-level representations remain robust for very short idioms in low-resource settings. The study contributes an interpretable RDF/OWL-compatible resource for culture-aware modeling of Chinese idioms.
%U https://aclanthology.org/2026.c3nlp-1.12/
%P 150-160
Markdown (Informal)
[Ontology-oriented lexico-semantic modeling and neural classification of Chinese chéngyǔ: A culture-aware NLP approach](https://aclanthology.org/2026.c3nlp-1.12/) (Chen, C3NLP 2026)
ACL