@inproceedings{wu-etal-2025-pseudo,
title = "Pseudo-label Data Construction Method and Syntax-enhanced Model for {C}hinese Semantic Error Recognition",
author = "Wu, Hongyan and
Lin, Nankai and
Jiang, Shengyi and
Wang, Lianxi and
Yang, Aimin",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.361/",
pages = "5391--5402",
abstract = "Chinese Semantic Error Recognition (CSER) has always been a weak link in Chinese language processing due to the complexity and obscureness of Chinese semantics. Existing research has gradually focused on leveraging pre-trained models to perform CSER. Although some researchers have attempted to integrate syntax information into the pre-trained language model, it requires training the models from scratch, which is time-consuming and laborious. Furthermore, despite the existence of datasets for CSER, the constrained size of these datasets impairs the performance of the models. Thus, in order to address the difficulty posed by a limited sample set and the need of annotating samples with semantic-level errors, we propose a Pseudo-label Data Construction method for CSER (PDC-CSER), generating pseudo-labels for augmented samples based on perplexity and model respectively, which overcomes the difficulty of constructing pseudo-label data containing semantic-level errors and ensures the quality of pseudo-labels. Moreover, we propose a CSER method with the Dependency Syntactic Attention mechanism (CSER-DSA) to explicitly infuse dependency syntactic information only in the fine-tuning stage, achieving robust performance, and simultaneously reducing substantial computing power and time cost. Results demonstrate that the pseudo-label technology PDC-CSER and the semantic error recognition method CSER-DSA surpass the existing models"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2025-pseudo">
<titleInfo>
<title>Pseudo-label Data Construction Method and Syntax-enhanced Model for Chinese Semantic Error Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongyan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nankai</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengyi</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lianxi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aimin</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Chinese Semantic Error Recognition (CSER) has always been a weak link in Chinese language processing due to the complexity and obscureness of Chinese semantics. Existing research has gradually focused on leveraging pre-trained models to perform CSER. Although some researchers have attempted to integrate syntax information into the pre-trained language model, it requires training the models from scratch, which is time-consuming and laborious. Furthermore, despite the existence of datasets for CSER, the constrained size of these datasets impairs the performance of the models. Thus, in order to address the difficulty posed by a limited sample set and the need of annotating samples with semantic-level errors, we propose a Pseudo-label Data Construction method for CSER (PDC-CSER), generating pseudo-labels for augmented samples based on perplexity and model respectively, which overcomes the difficulty of constructing pseudo-label data containing semantic-level errors and ensures the quality of pseudo-labels. Moreover, we propose a CSER method with the Dependency Syntactic Attention mechanism (CSER-DSA) to explicitly infuse dependency syntactic information only in the fine-tuning stage, achieving robust performance, and simultaneously reducing substantial computing power and time cost. Results demonstrate that the pseudo-label technology PDC-CSER and the semantic error recognition method CSER-DSA surpass the existing models</abstract>
<identifier type="citekey">wu-etal-2025-pseudo</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.361/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5391</start>
<end>5402</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pseudo-label Data Construction Method and Syntax-enhanced Model for Chinese Semantic Error Recognition
%A Wu, Hongyan
%A Lin, Nankai
%A Jiang, Shengyi
%A Wang, Lianxi
%A Yang, Aimin
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F wu-etal-2025-pseudo
%X Chinese Semantic Error Recognition (CSER) has always been a weak link in Chinese language processing due to the complexity and obscureness of Chinese semantics. Existing research has gradually focused on leveraging pre-trained models to perform CSER. Although some researchers have attempted to integrate syntax information into the pre-trained language model, it requires training the models from scratch, which is time-consuming and laborious. Furthermore, despite the existence of datasets for CSER, the constrained size of these datasets impairs the performance of the models. Thus, in order to address the difficulty posed by a limited sample set and the need of annotating samples with semantic-level errors, we propose a Pseudo-label Data Construction method for CSER (PDC-CSER), generating pseudo-labels for augmented samples based on perplexity and model respectively, which overcomes the difficulty of constructing pseudo-label data containing semantic-level errors and ensures the quality of pseudo-labels. Moreover, we propose a CSER method with the Dependency Syntactic Attention mechanism (CSER-DSA) to explicitly infuse dependency syntactic information only in the fine-tuning stage, achieving robust performance, and simultaneously reducing substantial computing power and time cost. Results demonstrate that the pseudo-label technology PDC-CSER and the semantic error recognition method CSER-DSA surpass the existing models
%U https://aclanthology.org/2025.coling-main.361/
%P 5391-5402
Markdown (Informal)
[Pseudo-label Data Construction Method and Syntax-enhanced Model for Chinese Semantic Error Recognition](https://aclanthology.org/2025.coling-main.361/) (Wu et al., COLING 2025)
ACL