@inproceedings{bolucu-etal-2025-bridging,
title = "Bridging the Gap: Instruction-Tuned {LLM}s for Scientific Named Entity Recognition",
author = {B{\"o}l{\"u}c{\"u}, Necva and
Rybinski, Maciej and
Wan, Stephen},
editor = "Accomazzi, Alberto and
Ghosal, Tirthankar and
Grezes, Felix and
Lockhart, Kelly",
booktitle = "Proceedings of the Third Workshop for Artificial Intelligence for Scientific Publications",
month = dec,
year = "2025",
address = "Mumbai, India and virtual",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wasp-main.7/",
pages = "56--71",
ISBN = "979-8-89176-310-4",
abstract = "Information extraction (IE) from scientific literature plays an important role in many information-seeking pipelines. Large Language Models (LLMs) have demonstrated strong zero-shot and few-shot performance on IE tasks. However, there are challenges in practical deployment, especially in scenarios that involve sensitive information, such as industrial research or limited budgets. A key question is whether there is a need for a fine-tuned model for optimal domain adaptation (i.e., whether in-domain labelled training data is needed, or zero-shot to few-shot effectiveness is enough). In this paper, we explore this question in the context of IE on scientific literature. We further consider methodological questions, such as alternatives to cloud-based proprietary LLMs (e.g., GPT and Claude) when these are unsuitable due to data privacy, data sensitivity, or cost reasons. This paper outlines empirical results to recommend which locally hosted open-source LLM approach to adopt and illustrates the trade-offs in domain adaptation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bolucu-etal-2025-bridging">
<titleInfo>
<title>Bridging the Gap: Instruction-Tuned LLMs for Scientific Named Entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Necva</namePart>
<namePart type="family">Bölücü</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Rybinski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop for Artificial Intelligence for Scientific Publications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Accomazzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Grezes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kelly</namePart>
<namePart type="family">Lockhart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India and virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-310-4</identifier>
</relatedItem>
<abstract>Information extraction (IE) from scientific literature plays an important role in many information-seeking pipelines. Large Language Models (LLMs) have demonstrated strong zero-shot and few-shot performance on IE tasks. However, there are challenges in practical deployment, especially in scenarios that involve sensitive information, such as industrial research or limited budgets. A key question is whether there is a need for a fine-tuned model for optimal domain adaptation (i.e., whether in-domain labelled training data is needed, or zero-shot to few-shot effectiveness is enough). In this paper, we explore this question in the context of IE on scientific literature. We further consider methodological questions, such as alternatives to cloud-based proprietary LLMs (e.g., GPT and Claude) when these are unsuitable due to data privacy, data sensitivity, or cost reasons. This paper outlines empirical results to recommend which locally hosted open-source LLM approach to adopt and illustrates the trade-offs in domain adaptation.</abstract>
<identifier type="citekey">bolucu-etal-2025-bridging</identifier>
<location>
<url>https://aclanthology.org/2025.wasp-main.7/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>56</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridging the Gap: Instruction-Tuned LLMs for Scientific Named Entity Recognition
%A Bölücü, Necva
%A Rybinski, Maciej
%A Wan, Stephen
%Y Accomazzi, Alberto
%Y Ghosal, Tirthankar
%Y Grezes, Felix
%Y Lockhart, Kelly
%S Proceedings of the Third Workshop for Artificial Intelligence for Scientific Publications
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India and virtual
%@ 979-8-89176-310-4
%F bolucu-etal-2025-bridging
%X Information extraction (IE) from scientific literature plays an important role in many information-seeking pipelines. Large Language Models (LLMs) have demonstrated strong zero-shot and few-shot performance on IE tasks. However, there are challenges in practical deployment, especially in scenarios that involve sensitive information, such as industrial research or limited budgets. A key question is whether there is a need for a fine-tuned model for optimal domain adaptation (i.e., whether in-domain labelled training data is needed, or zero-shot to few-shot effectiveness is enough). In this paper, we explore this question in the context of IE on scientific literature. We further consider methodological questions, such as alternatives to cloud-based proprietary LLMs (e.g., GPT and Claude) when these are unsuitable due to data privacy, data sensitivity, or cost reasons. This paper outlines empirical results to recommend which locally hosted open-source LLM approach to adopt and illustrates the trade-offs in domain adaptation.
%U https://aclanthology.org/2025.wasp-main.7/
%P 56-71
Markdown (Informal)
[Bridging the Gap: Instruction-Tuned LLMs for Scientific Named Entity Recognition](https://aclanthology.org/2025.wasp-main.7/) (Bölücü et al., WASP 2025)
ACL