@inproceedings{xu-etal-2025-llms-weakness,
title = "{LLM}{'}s Weakness in {NER} Doesn{'}t Stop It from Enhancing a Stronger {SLM}",
author = "Xu, Weilu and
Dang, Renfei and
Huang, Shujian",
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C. and
Sprugnoli, Rachele",
booktitle = "Proceedings of the Second Workshop on Ancient Language Processing",
month = may,
year = "2025",
address = "The Albuquerque Convention Center, Laguna",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.alp-1.21/",
doi = "10.18653/v1/2025.alp-1.21",
pages = "170--175",
ISBN = "979-8-89176-235-0",
abstract = "Large Language Models (LLMs) demonstrate strong semantic understanding ability and extensive knowledge, but struggle with Named Entity Recognition (NER) due to hallucination and high training costs. Meanwhile, supervised Small Language Models (SLMs) efficiently provide structured predictions but lack adaptability to unseen entities and complex contexts. In this study, we investigate how a relatively weaker LLM can effectively support a supervised model in NER tasks. We first improve the LLM using LoRA-based fine-tuning and similarity-based prompting, achieving performance comparable to a SLM baseline. To further improve results, we propose a fusion strategy that integrates both models: prioritising SLM{'}s predictions while using LLM guidance in low confidence cases. Our hybrid approach outperforms both baselines on three classic Chinese NER datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2025-llms-weakness">
<titleInfo>
<title>LLM’s Weakness in NER Doesn’t Stop It from Enhancing a Stronger SLM</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weilu</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Renfei</namePart>
<namePart type="family">Dang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shujian</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Ancient Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shai</namePart>
<namePart type="family">Gordin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">The Albuquerque Convention Center, Laguna</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-235-0</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) demonstrate strong semantic understanding ability and extensive knowledge, but struggle with Named Entity Recognition (NER) due to hallucination and high training costs. Meanwhile, supervised Small Language Models (SLMs) efficiently provide structured predictions but lack adaptability to unseen entities and complex contexts. In this study, we investigate how a relatively weaker LLM can effectively support a supervised model in NER tasks. We first improve the LLM using LoRA-based fine-tuning and similarity-based prompting, achieving performance comparable to a SLM baseline. To further improve results, we propose a fusion strategy that integrates both models: prioritising SLM’s predictions while using LLM guidance in low confidence cases. Our hybrid approach outperforms both baselines on three classic Chinese NER datasets.</abstract>
<identifier type="citekey">xu-etal-2025-llms-weakness</identifier>
<identifier type="doi">10.18653/v1/2025.alp-1.21</identifier>
<location>
<url>https://aclanthology.org/2025.alp-1.21/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>170</start>
<end>175</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM’s Weakness in NER Doesn’t Stop It from Enhancing a Stronger SLM
%A Xu, Weilu
%A Dang, Renfei
%A Huang, Shujian
%Y Anderson, Adam
%Y Gordin, Shai
%Y Li, Bin
%Y Liu, Yudong
%Y Passarotti, Marco C.
%Y Sprugnoli, Rachele
%S Proceedings of the Second Workshop on Ancient Language Processing
%D 2025
%8 May
%I Association for Computational Linguistics
%C The Albuquerque Convention Center, Laguna
%@ 979-8-89176-235-0
%F xu-etal-2025-llms-weakness
%X Large Language Models (LLMs) demonstrate strong semantic understanding ability and extensive knowledge, but struggle with Named Entity Recognition (NER) due to hallucination and high training costs. Meanwhile, supervised Small Language Models (SLMs) efficiently provide structured predictions but lack adaptability to unseen entities and complex contexts. In this study, we investigate how a relatively weaker LLM can effectively support a supervised model in NER tasks. We first improve the LLM using LoRA-based fine-tuning and similarity-based prompting, achieving performance comparable to a SLM baseline. To further improve results, we propose a fusion strategy that integrates both models: prioritising SLM’s predictions while using LLM guidance in low confidence cases. Our hybrid approach outperforms both baselines on three classic Chinese NER datasets.
%R 10.18653/v1/2025.alp-1.21
%U https://aclanthology.org/2025.alp-1.21/
%U https://doi.org/10.18653/v1/2025.alp-1.21
%P 170-175
Markdown (Informal)
[LLM’s Weakness in NER Doesn’t Stop It from Enhancing a Stronger SLM](https://aclanthology.org/2025.alp-1.21/) (Xu et al., ALP 2025)
ACL