@inproceedings{shimizu-etal-2025-exploring,
title = "Exploring {LLM} Annotation for Adaptation of Clinical Information Extraction Models under Data-sharing Restrictions",
author = "Shimizu, Seiji and
Shohei, Hisada and
Uno, Yutaka and
Yada, Shuntaro and
Wakamiya, Shoko and
Aramaki, Eiji",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.757/",
doi = "10.18653/v1/2025.findings-acl.757",
pages = "14678--14694",
ISBN = "979-8-89176-256-5",
abstract = "In-hospital text data contains valuable clinical information, yet deploying fine-tuned small language models (SLMs) for information extraction remains challenging due to differences in formatting and vocabulary across institutions. Since access to the original in-hospital data (source domain) is often restricted, annotated data from the target hospital (target domain) is crucial for domain adaptation. However, clinical annotation is notoriously expensive and time-consuming, as it demands clinical and linguistic expertise. To address this issue, we leverage large language models (LLMs) to annotate the target domain data for the adaptation. We conduct experiments on four clinical information extraction tasks, including eight target domain data. Experimental results show that LLM-annotated data consistently enhances SLM performance and, with a larger number of annotated data, outperforms manual annotation in three out of four tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shimizu-etal-2025-exploring">
<titleInfo>
<title>Exploring LLM Annotation for Adaptation of Clinical Information Extraction Models under Data-sharing Restrictions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seiji</namePart>
<namePart type="family">Shimizu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hisada</namePart>
<namePart type="family">Shohei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yutaka</namePart>
<namePart type="family">Uno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuntaro</namePart>
<namePart type="family">Yada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shoko</namePart>
<namePart type="family">Wakamiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiji</namePart>
<namePart type="family">Aramaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>In-hospital text data contains valuable clinical information, yet deploying fine-tuned small language models (SLMs) for information extraction remains challenging due to differences in formatting and vocabulary across institutions. Since access to the original in-hospital data (source domain) is often restricted, annotated data from the target hospital (target domain) is crucial for domain adaptation. However, clinical annotation is notoriously expensive and time-consuming, as it demands clinical and linguistic expertise. To address this issue, we leverage large language models (LLMs) to annotate the target domain data for the adaptation. We conduct experiments on four clinical information extraction tasks, including eight target domain data. Experimental results show that LLM-annotated data consistently enhances SLM performance and, with a larger number of annotated data, outperforms manual annotation in three out of four tasks.</abstract>
<identifier type="citekey">shimizu-etal-2025-exploring</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.757</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.757/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>14678</start>
<end>14694</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring LLM Annotation for Adaptation of Clinical Information Extraction Models under Data-sharing Restrictions
%A Shimizu, Seiji
%A Shohei, Hisada
%A Uno, Yutaka
%A Yada, Shuntaro
%A Wakamiya, Shoko
%A Aramaki, Eiji
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F shimizu-etal-2025-exploring
%X In-hospital text data contains valuable clinical information, yet deploying fine-tuned small language models (SLMs) for information extraction remains challenging due to differences in formatting and vocabulary across institutions. Since access to the original in-hospital data (source domain) is often restricted, annotated data from the target hospital (target domain) is crucial for domain adaptation. However, clinical annotation is notoriously expensive and time-consuming, as it demands clinical and linguistic expertise. To address this issue, we leverage large language models (LLMs) to annotate the target domain data for the adaptation. We conduct experiments on four clinical information extraction tasks, including eight target domain data. Experimental results show that LLM-annotated data consistently enhances SLM performance and, with a larger number of annotated data, outperforms manual annotation in three out of four tasks.
%R 10.18653/v1/2025.findings-acl.757
%U https://aclanthology.org/2025.findings-acl.757/
%U https://doi.org/10.18653/v1/2025.findings-acl.757
%P 14678-14694
Markdown (Informal)
[Exploring LLM Annotation for Adaptation of Clinical Information Extraction Models under Data-sharing Restrictions](https://aclanthology.org/2025.findings-acl.757/) (Shimizu et al., Findings 2025)
ACL