@inproceedings{li-etal-2024-wkner-enhancing,
title = "{W}k{NER}: Enhancing Named Entity Recognition with Word Segmentation Constraints and k{NN} Retrieval",
author = "Li, Yanchun and
Deng, Senlin and
Shen, Dongsu and
Tian, Shujuan and
Long, Saiqin",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1535",
pages = "17651--17663",
abstract = "Fine-tuning Pre-trained Language Models (PLMs) is a popular Natural Language Processing (NLP) paradigm for addressing Named Entity Recognition (NER) tasks. However, neural network models often demonstrate poor generalization capabilities due to significant disparities between the knowledge learned by PLMs and the distribution of the target dataset, as well as data scarcity issues. In addition, token omission in predictions due to insufficient learning remains a challenge in NER. In this paper, we propose a kNN retrieval enhancement algorithm (WkNER) that incorporates word segmentation information to enhance the model{'}s generalization ability and alleviate the problem of missing entity tokens in prediction. The introduction of word segmentation information is used to preliminarily determine the boundaries of entities and alleviate the common prediction errors of missing tokens within entities made by the fine-tuned model. Secondly, we find that non-entities in the retrieval table contain a large amount of redundant information, and explore the effects of introducing non-entity information of different scales on the model. Experimental results show that our proposed method significantly improves the performance of baseline models, and achieves better or compared recognition accuracy than previous state-of-the-art models in multiple public Chinese and English datasets. Especially in low-resource scenarios, our method achieves higher accuracy on 20{\%} of the dataset than the original method on the full dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2024-wkner-enhancing">
<titleInfo>
<title>WkNER: Enhancing Named Entity Recognition with Word Segmentation Constraints and kNN Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanchun</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senlin</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongsu</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shujuan</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saiqin</namePart>
<namePart type="family">Long</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Fine-tuning Pre-trained Language Models (PLMs) is a popular Natural Language Processing (NLP) paradigm for addressing Named Entity Recognition (NER) tasks. However, neural network models often demonstrate poor generalization capabilities due to significant disparities between the knowledge learned by PLMs and the distribution of the target dataset, as well as data scarcity issues. In addition, token omission in predictions due to insufficient learning remains a challenge in NER. In this paper, we propose a kNN retrieval enhancement algorithm (WkNER) that incorporates word segmentation information to enhance the model’s generalization ability and alleviate the problem of missing entity tokens in prediction. The introduction of word segmentation information is used to preliminarily determine the boundaries of entities and alleviate the common prediction errors of missing tokens within entities made by the fine-tuned model. Secondly, we find that non-entities in the retrieval table contain a large amount of redundant information, and explore the effects of introducing non-entity information of different scales on the model. Experimental results show that our proposed method significantly improves the performance of baseline models, and achieves better or compared recognition accuracy than previous state-of-the-art models in multiple public Chinese and English datasets. Especially in low-resource scenarios, our method achieves higher accuracy on 20% of the dataset than the original method on the full dataset.</abstract>
<identifier type="citekey">li-etal-2024-wkner-enhancing</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1535</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>17651</start>
<end>17663</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WkNER: Enhancing Named Entity Recognition with Word Segmentation Constraints and kNN Retrieval
%A Li, Yanchun
%A Deng, Senlin
%A Shen, Dongsu
%A Tian, Shujuan
%A Long, Saiqin
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F li-etal-2024-wkner-enhancing
%X Fine-tuning Pre-trained Language Models (PLMs) is a popular Natural Language Processing (NLP) paradigm for addressing Named Entity Recognition (NER) tasks. However, neural network models often demonstrate poor generalization capabilities due to significant disparities between the knowledge learned by PLMs and the distribution of the target dataset, as well as data scarcity issues. In addition, token omission in predictions due to insufficient learning remains a challenge in NER. In this paper, we propose a kNN retrieval enhancement algorithm (WkNER) that incorporates word segmentation information to enhance the model’s generalization ability and alleviate the problem of missing entity tokens in prediction. The introduction of word segmentation information is used to preliminarily determine the boundaries of entities and alleviate the common prediction errors of missing tokens within entities made by the fine-tuned model. Secondly, we find that non-entities in the retrieval table contain a large amount of redundant information, and explore the effects of introducing non-entity information of different scales on the model. Experimental results show that our proposed method significantly improves the performance of baseline models, and achieves better or compared recognition accuracy than previous state-of-the-art models in multiple public Chinese and English datasets. Especially in low-resource scenarios, our method achieves higher accuracy on 20% of the dataset than the original method on the full dataset.
%U https://aclanthology.org/2024.lrec-main.1535
%P 17651-17663
Markdown (Informal)
[WkNER: Enhancing Named Entity Recognition with Word Segmentation Constraints and kNN Retrieval](https://aclanthology.org/2024.lrec-main.1535) (Li et al., LREC-COLING 2024)
ACL