@inproceedings{li-etal-2025-towards-reliable,
title = "Towards Reliable Generation of Clinical Chart Items: A Counterfactual Reasoning Approach with Large Language Models",
author = "Li, Jiaxuan and
Rezayi, Saed and
Baldwin, Peter and
Harik, Polina and
Yaneva, Victoria",
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://aclanthology.org/2025.aimecon-main.16/",
pages = "142--153",
ISBN = "979-8-218-84228-4",
abstract = "This study explores GPT-4 for generating clinical chart items in medical education using three prompting strategies. Expert evaluations found many items usable or promising. The counterfactual approach enhanced novelty, and item quality improved with high-surprisal examples. This is the first investigation of LLMs for automated clinical chart item generation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-towards-reliable">
<titleInfo>
<title>Towards Reliable Generation of Clinical Chart Items: A Counterfactual Reasoning Approach with Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiaxuan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saed</namePart>
<namePart type="family">Rezayi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Polina</namePart>
<namePart type="family">Harik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Ormerod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalen</namePart>
<namePart type="family">Beiting Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>National Council on Measurement in Education (NCME)</publisher>
<place>
<placeTerm type="text">Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-218-84228-4</identifier>
</relatedItem>
<abstract>This study explores GPT-4 for generating clinical chart items in medical education using three prompting strategies. Expert evaluations found many items usable or promising. The counterfactual approach enhanced novelty, and item quality improved with high-surprisal examples. This is the first investigation of LLMs for automated clinical chart item generation.</abstract>
<identifier type="citekey">li-etal-2025-towards-reliable</identifier>
<location>
<url>https://aclanthology.org/2025.aimecon-main.16/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>142</start>
<end>153</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Reliable Generation of Clinical Chart Items: A Counterfactual Reasoning Approach with Large Language Models
%A Li, Jiaxuan
%A Rezayi, Saed
%A Baldwin, Peter
%A Harik, Polina
%A Yaneva, Victoria
%Y Wilson, Joshua
%Y Ormerod, Christopher
%Y Beiting Parrish, Magdalen
%S Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers
%D 2025
%8 October
%I National Council on Measurement in Education (NCME)
%C Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States
%@ 979-8-218-84228-4
%F li-etal-2025-towards-reliable
%X This study explores GPT-4 for generating clinical chart items in medical education using three prompting strategies. Expert evaluations found many items usable or promising. The counterfactual approach enhanced novelty, and item quality improved with high-surprisal examples. This is the first investigation of LLMs for automated clinical chart item generation.
%U https://aclanthology.org/2025.aimecon-main.16/
%P 142-153
Markdown (Informal)
[Towards Reliable Generation of Clinical Chart Items: A Counterfactual Reasoning Approach with Large Language Models](https://aclanthology.org/2025.aimecon-main.16/) (Li et al., AIME-Con 2025)
ACL
- Jiaxuan Li, Saed Rezayi, Peter Baldwin, Polina Harik, and Victoria Yaneva. 2025. Towards Reliable Generation of Clinical Chart Items: A Counterfactual Reasoning Approach with Large Language Models. In Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers, pages 142–153, Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States. National Council on Measurement in Education (NCME).