@inproceedings{karim-uzuner-2025-masonnlp,
title = "{M}ason{NLP} at {MEDIQA}-{OE} 2025: Assessing Large Language Models for Structured Medical Order Extraction",
author = "Karim, A H M Rezaul and
Uzuner, Ozlem",
editor = "Ben Abacha, Asma and
Bethard, Steven and
Bitterman, Danielle and
Naumann, Tristan and
Roberts, Kirk",
booktitle = "Proceedings of the 7th Clinical Natural Language Processing Workshop",
month = oct,
year = "2025",
address = "Virtual",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.clinicalnlp-1.7/",
pages = "57--67",
abstract = "Medical order extraction is essential for structuring actionable clinical information, supporting decision-making, and enabling downstream applications such as documentation and workflow automation. Orders may be embedded in diverse sources, including electronic health records, discharge summaries, and multi-turn doctor{--}patient dialogues, and can span categories such as medications, laboratory tests, imaging studies, and follow-up actions. The MEDIQA-OE 2025 shared task focuses on extracting structured medical orders from extended conversational transcripts, requiring the identification of order type, description, reason, and provenance. We present the MasonNLP submission, which ranked 5th among 17 participating teams with 105 total submissions. Our approach uses a general-purpose, instruction-tuned LLaMA-4 17B model without domain-specific fine-tuning, guided by a single in-context example. This few-shot configuration achieved an average F1 score of 37.76, with notable improvements in reason and provenance accuracy. These results demonstrate that large, non-domain-specific LLMs, when paired with effective prompt engineering, can serve as strong, scalable baselines for specialized clinical NLP tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karim-uzuner-2025-masonnlp">
<titleInfo>
<title>MasonNLP at MEDIQA-OE 2025: Assessing Large Language Models for Structured Medical Order Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">H</namePart>
<namePart type="given">M</namePart>
<namePart type="given">Rezaul</namePart>
<namePart type="family">Karim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ozlem</namePart>
<namePart type="family">Uzuner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="family">Ben Abacha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danielle</namePart>
<namePart type="family">Bitterman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Medical order extraction is essential for structuring actionable clinical information, supporting decision-making, and enabling downstream applications such as documentation and workflow automation. Orders may be embedded in diverse sources, including electronic health records, discharge summaries, and multi-turn doctor–patient dialogues, and can span categories such as medications, laboratory tests, imaging studies, and follow-up actions. The MEDIQA-OE 2025 shared task focuses on extracting structured medical orders from extended conversational transcripts, requiring the identification of order type, description, reason, and provenance. We present the MasonNLP submission, which ranked 5th among 17 participating teams with 105 total submissions. Our approach uses a general-purpose, instruction-tuned LLaMA-4 17B model without domain-specific fine-tuning, guided by a single in-context example. This few-shot configuration achieved an average F1 score of 37.76, with notable improvements in reason and provenance accuracy. These results demonstrate that large, non-domain-specific LLMs, when paired with effective prompt engineering, can serve as strong, scalable baselines for specialized clinical NLP tasks.</abstract>
<identifier type="citekey">karim-uzuner-2025-masonnlp</identifier>
<location>
<url>https://aclanthology.org/2025.clinicalnlp-1.7/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>57</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MasonNLP at MEDIQA-OE 2025: Assessing Large Language Models for Structured Medical Order Extraction
%A Karim, A. H. M. Rezaul
%A Uzuner, Ozlem
%Y Ben Abacha, Asma
%Y Bethard, Steven
%Y Bitterman, Danielle
%Y Naumann, Tristan
%Y Roberts, Kirk
%S Proceedings of the 7th Clinical Natural Language Processing Workshop
%D 2025
%8 October
%I Association for Computational Linguistics
%C Virtual
%F karim-uzuner-2025-masonnlp
%X Medical order extraction is essential for structuring actionable clinical information, supporting decision-making, and enabling downstream applications such as documentation and workflow automation. Orders may be embedded in diverse sources, including electronic health records, discharge summaries, and multi-turn doctor–patient dialogues, and can span categories such as medications, laboratory tests, imaging studies, and follow-up actions. The MEDIQA-OE 2025 shared task focuses on extracting structured medical orders from extended conversational transcripts, requiring the identification of order type, description, reason, and provenance. We present the MasonNLP submission, which ranked 5th among 17 participating teams with 105 total submissions. Our approach uses a general-purpose, instruction-tuned LLaMA-4 17B model without domain-specific fine-tuning, guided by a single in-context example. This few-shot configuration achieved an average F1 score of 37.76, with notable improvements in reason and provenance accuracy. These results demonstrate that large, non-domain-specific LLMs, when paired with effective prompt engineering, can serve as strong, scalable baselines for specialized clinical NLP tasks.
%U https://aclanthology.org/2025.clinicalnlp-1.7/
%P 57-67
Markdown (Informal)
[MasonNLP at MEDIQA-OE 2025: Assessing Large Language Models for Structured Medical Order Extraction](https://aclanthology.org/2025.clinicalnlp-1.7/) (Karim & Uzuner, ClinicalNLP 2025)
ACL