@inproceedings{ding-etal-2026-promed,
title = "{P}ro{M}ed: Shapley Information Gain Guided Reinforcement Learning for Proactive Medical {LLM}s",
author = "Ding, Hongxin and
Huang, Baixiang and
Fang, Yue and
Liao, Weibin and
Jiang, Xinke and
Zhang, Jinyang and
Zhu, Yinghao and
Li, Zheng and
Ma, Liantao and
Zhao, Junfeng and
Wang, Yasha",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1500/",
pages = "32481--32515",
ISBN = "979-8-89176-390-6",
abstract = "Interactive medical questioning is essential in clinical consultations, where physicians must actively gather necessary patient information. Yet existing medical Large Language Models (LLMs) predominantly follow a reactive paradigm, risking diagnostic errors by answering before seeking sufficient details. To bridge this gap, we propose ProMed, a reinforcement learning framework that transitions LLMs toward a proactive paradigm, enabling them to ask clinically valuable questions before decision-making. Central to ProMed is the Shapley Information Gain (SIG) reward, which quantifies a question{'}s clinical utility as the amount of newly acquired information, while considering its contextual importance via Shapley values. We integrate SIG into a two-stage training pipeline: (1) SIG-Guided Model Initialization uses Monte Carlo Tree Search to construct high-reward interaction trajectories for supervision, and (2) SIG-Augmented Policy Optimization, with a novel SIG-guided Reward Distribution Mechanism that prioritizes informative questions for fine-grained optimization. Experiments on partial-information medical benchmarks show that ProMed significantly outperforms state-of-the-art methods by 6.29{\%} on average and delivers a 54.45{\%} gain over the reactive paradigm, and generalizes robustly to out-of-domain cases. Our codes are available at https://github.com/hxxding/ProMed."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ding-etal-2026-promed">
<titleInfo>
<title>ProMed: Shapley Information Gain Guided Reinforcement Learning for Proactive Medical LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongxin</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baixiang</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weibin</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinke</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinyang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinghao</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liantao</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junfeng</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasha</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Interactive medical questioning is essential in clinical consultations, where physicians must actively gather necessary patient information. Yet existing medical Large Language Models (LLMs) predominantly follow a reactive paradigm, risking diagnostic errors by answering before seeking sufficient details. To bridge this gap, we propose ProMed, a reinforcement learning framework that transitions LLMs toward a proactive paradigm, enabling them to ask clinically valuable questions before decision-making. Central to ProMed is the Shapley Information Gain (SIG) reward, which quantifies a question’s clinical utility as the amount of newly acquired information, while considering its contextual importance via Shapley values. We integrate SIG into a two-stage training pipeline: (1) SIG-Guided Model Initialization uses Monte Carlo Tree Search to construct high-reward interaction trajectories for supervision, and (2) SIG-Augmented Policy Optimization, with a novel SIG-guided Reward Distribution Mechanism that prioritizes informative questions for fine-grained optimization. Experiments on partial-information medical benchmarks show that ProMed significantly outperforms state-of-the-art methods by 6.29% on average and delivers a 54.45% gain over the reactive paradigm, and generalizes robustly to out-of-domain cases. Our codes are available at https://github.com/hxxding/ProMed.</abstract>
<identifier type="citekey">ding-etal-2026-promed</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1500/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>32481</start>
<end>32515</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ProMed: Shapley Information Gain Guided Reinforcement Learning for Proactive Medical LLMs
%A Ding, Hongxin
%A Huang, Baixiang
%A Fang, Yue
%A Liao, Weibin
%A Jiang, Xinke
%A Zhang, Jinyang
%A Zhu, Yinghao
%A Li, Zheng
%A Ma, Liantao
%A Zhao, Junfeng
%A Wang, Yasha
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F ding-etal-2026-promed
%X Interactive medical questioning is essential in clinical consultations, where physicians must actively gather necessary patient information. Yet existing medical Large Language Models (LLMs) predominantly follow a reactive paradigm, risking diagnostic errors by answering before seeking sufficient details. To bridge this gap, we propose ProMed, a reinforcement learning framework that transitions LLMs toward a proactive paradigm, enabling them to ask clinically valuable questions before decision-making. Central to ProMed is the Shapley Information Gain (SIG) reward, which quantifies a question’s clinical utility as the amount of newly acquired information, while considering its contextual importance via Shapley values. We integrate SIG into a two-stage training pipeline: (1) SIG-Guided Model Initialization uses Monte Carlo Tree Search to construct high-reward interaction trajectories for supervision, and (2) SIG-Augmented Policy Optimization, with a novel SIG-guided Reward Distribution Mechanism that prioritizes informative questions for fine-grained optimization. Experiments on partial-information medical benchmarks show that ProMed significantly outperforms state-of-the-art methods by 6.29% on average and delivers a 54.45% gain over the reactive paradigm, and generalizes robustly to out-of-domain cases. Our codes are available at https://github.com/hxxding/ProMed.
%U https://aclanthology.org/2026.acl-long.1500/
%P 32481-32515
Markdown (Informal)
[ProMed: Shapley Information Gain Guided Reinforcement Learning for Proactive Medical LLMs](https://aclanthology.org/2026.acl-long.1500/) (Ding et al., ACL 2026)
ACL
- Hongxin Ding, Baixiang Huang, Yue Fang, Weibin Liao, Xinke Jiang, Jinyang Zhang, Yinghao Zhu, Zheng Li, Liantao Ma, Junfeng Zhao, and Yasha Wang. 2026. ProMed: Shapley Information Gain Guided Reinforcement Learning for Proactive Medical LLMs. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 32481–32515, San Diego, California, United States. Association for Computational Linguistics.