@inproceedings{han-etal-2025-complex,
title = "From Complex Word Identification to Substitution: Instruction-Tuned Language Models for Lexical Simplification",
author = "Han, Tonghui and
Zhang, Xinru and
Bi, Yaxin and
Mulvenna, Maurice D. and
Yang, Dongqiang",
editor = "Frermann, Lea and
Stevenson, Mark",
booktitle = "Proceedings of the 14th Joint Conference on Lexical and Computational Semantics (*SEM 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.starsem-1.4/",
pages = "48--58",
ISBN = "979-8-89176-340-1",
abstract = "Lexical-level sentence simplification is essential for improving text accessibility, yet traditional methods often struggle to dynamically identify complex terms and generate contextually appropriate substitutions, resulting in limited generalization. While prompt-based approaches with large language models (LLMs) have shown strong performance and adaptability, they often lack interpretability and are prone to hallucinating. This study proposes a fine-tuning approach for mid-sized LLMs to emulate the lexical simplification pipeline. We transform complex word identification datasets into an instruction{--}response format to support instruction tuning. Experimental results show that our method substantially enhances complex word identification accuracy with reduced hallucinations while achieving competitive performance on lexical simplification benchmarks. Furthermore, we find that integrating fine-tuning with prompt engineering reduces dependency on manual prompt optimization, leading to a more efficient simplification framework."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="han-etal-2025-complex">
<titleInfo>
<title>From Complex Word Identification to Substitution: Instruction-Tuned Language Models for Lexical Simplification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tonghui</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinru</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaxin</namePart>
<namePart type="family">Bi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maurice</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Mulvenna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongqiang</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th Joint Conference on Lexical and Computational Semantics (*SEM 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lea</namePart>
<namePart type="family">Frermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Stevenson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-340-1</identifier>
</relatedItem>
<abstract>Lexical-level sentence simplification is essential for improving text accessibility, yet traditional methods often struggle to dynamically identify complex terms and generate contextually appropriate substitutions, resulting in limited generalization. While prompt-based approaches with large language models (LLMs) have shown strong performance and adaptability, they often lack interpretability and are prone to hallucinating. This study proposes a fine-tuning approach for mid-sized LLMs to emulate the lexical simplification pipeline. We transform complex word identification datasets into an instruction–response format to support instruction tuning. Experimental results show that our method substantially enhances complex word identification accuracy with reduced hallucinations while achieving competitive performance on lexical simplification benchmarks. Furthermore, we find that integrating fine-tuning with prompt engineering reduces dependency on manual prompt optimization, leading to a more efficient simplification framework.</abstract>
<identifier type="citekey">han-etal-2025-complex</identifier>
<location>
<url>https://aclanthology.org/2025.starsem-1.4/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>48</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Complex Word Identification to Substitution: Instruction-Tuned Language Models for Lexical Simplification
%A Han, Tonghui
%A Zhang, Xinru
%A Bi, Yaxin
%A Mulvenna, Maurice D.
%A Yang, Dongqiang
%Y Frermann, Lea
%Y Stevenson, Mark
%S Proceedings of the 14th Joint Conference on Lexical and Computational Semantics (*SEM 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-340-1
%F han-etal-2025-complex
%X Lexical-level sentence simplification is essential for improving text accessibility, yet traditional methods often struggle to dynamically identify complex terms and generate contextually appropriate substitutions, resulting in limited generalization. While prompt-based approaches with large language models (LLMs) have shown strong performance and adaptability, they often lack interpretability and are prone to hallucinating. This study proposes a fine-tuning approach for mid-sized LLMs to emulate the lexical simplification pipeline. We transform complex word identification datasets into an instruction–response format to support instruction tuning. Experimental results show that our method substantially enhances complex word identification accuracy with reduced hallucinations while achieving competitive performance on lexical simplification benchmarks. Furthermore, we find that integrating fine-tuning with prompt engineering reduces dependency on manual prompt optimization, leading to a more efficient simplification framework.
%U https://aclanthology.org/2025.starsem-1.4/
%P 48-58
Markdown (Informal)
[From Complex Word Identification to Substitution: Instruction-Tuned Language Models for Lexical Simplification](https://aclanthology.org/2025.starsem-1.4/) (Han et al., *SEM 2025)
ACL