@inproceedings{wu-etal-2025-active,
title = "Active Domain Knowledge Acquisition with 100-Dollar Budget: Enhancing {LLM}s via Cost-Efficient, Expert-Involved Interaction in Sensitive Domains",
author = "Wu, Yang and
Moraffah, Raha and
Yao, Rujing and
Yu, Jinhong and
Tao, Zhimin and
Liu, Xiaozhong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.713/",
pages = "13244--13257",
ISBN = "979-8-89176-335-7",
abstract = "Large Language Models (LLMs) have demonstrated an impressive level of general knowledge. However, they often struggle in highly specialized and sensitive domains such as drug discovery and rare disease research due to the lack of expert knowledge, which is often costly to obtain. In this paper, we propose a novel framework (PU-ADKA) designed to efficiently enhance domain-specific LLMs by actively engaging domain experts within a fixed budget. Unlike traditional fine-tuning approaches, PU-ADKA proactively identifies and queries the most appropriate expert from a team, taking into account each expert{'}s availability, competency, knowledge boundaries, and consultation cost. We train PU-ADKA using simulations on PubMed publication data and validate it through domain expert interactions, showing promising improvements in LLM domain knowledge acquisition. Furthermore, our experiments with a real-world drug development team validate that PU-ADKA can significantly enhance LLM performance in specialized domains while adhering to strict budget constraints. In addition to outlining our methodological innovations and experimental results, we release a new benchmark dataset, CKAD, for cost-effective LLM domain knowledge acquisition to foster further research in this challenging area."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2025-active">
<titleInfo>
<title>Active Domain Knowledge Acquisition with 100-Dollar Budget: Enhancing LLMs via Cost-Efficient, Expert-Involved Interaction in Sensitive Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raha</namePart>
<namePart type="family">Moraffah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rujing</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinhong</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhimin</namePart>
<namePart type="family">Tao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaozhong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have demonstrated an impressive level of general knowledge. However, they often struggle in highly specialized and sensitive domains such as drug discovery and rare disease research due to the lack of expert knowledge, which is often costly to obtain. In this paper, we propose a novel framework (PU-ADKA) designed to efficiently enhance domain-specific LLMs by actively engaging domain experts within a fixed budget. Unlike traditional fine-tuning approaches, PU-ADKA proactively identifies and queries the most appropriate expert from a team, taking into account each expert’s availability, competency, knowledge boundaries, and consultation cost. We train PU-ADKA using simulations on PubMed publication data and validate it through domain expert interactions, showing promising improvements in LLM domain knowledge acquisition. Furthermore, our experiments with a real-world drug development team validate that PU-ADKA can significantly enhance LLM performance in specialized domains while adhering to strict budget constraints. In addition to outlining our methodological innovations and experimental results, we release a new benchmark dataset, CKAD, for cost-effective LLM domain knowledge acquisition to foster further research in this challenging area.</abstract>
<identifier type="citekey">wu-etal-2025-active</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.713/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>13244</start>
<end>13257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Active Domain Knowledge Acquisition with 100-Dollar Budget: Enhancing LLMs via Cost-Efficient, Expert-Involved Interaction in Sensitive Domains
%A Wu, Yang
%A Moraffah, Raha
%A Yao, Rujing
%A Yu, Jinhong
%A Tao, Zhimin
%A Liu, Xiaozhong
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F wu-etal-2025-active
%X Large Language Models (LLMs) have demonstrated an impressive level of general knowledge. However, they often struggle in highly specialized and sensitive domains such as drug discovery and rare disease research due to the lack of expert knowledge, which is often costly to obtain. In this paper, we propose a novel framework (PU-ADKA) designed to efficiently enhance domain-specific LLMs by actively engaging domain experts within a fixed budget. Unlike traditional fine-tuning approaches, PU-ADKA proactively identifies and queries the most appropriate expert from a team, taking into account each expert’s availability, competency, knowledge boundaries, and consultation cost. We train PU-ADKA using simulations on PubMed publication data and validate it through domain expert interactions, showing promising improvements in LLM domain knowledge acquisition. Furthermore, our experiments with a real-world drug development team validate that PU-ADKA can significantly enhance LLM performance in specialized domains while adhering to strict budget constraints. In addition to outlining our methodological innovations and experimental results, we release a new benchmark dataset, CKAD, for cost-effective LLM domain knowledge acquisition to foster further research in this challenging area.
%U https://aclanthology.org/2025.findings-emnlp.713/
%P 13244-13257
Markdown (Informal)
[Active Domain Knowledge Acquisition with 100-Dollar Budget: Enhancing LLMs via Cost-Efficient, Expert-Involved Interaction in Sensitive Domains](https://aclanthology.org/2025.findings-emnlp.713/) (Wu et al., Findings 2025)
ACL