@inproceedings{amara-etal-2024-large,
title = "Large Language Models Provide Human-Level Medical Text Snippet Labeling",
author = "Amara, Ibtihel and
Yu, Haiyang and
Zhang, Fan and
Liu, Yuchen and
Li, Benny and
Liu, Chang and
Kartha, Rupesh and
Goel, Akshay",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Bitterman, Danielle",
booktitle = "Proceedings of the 6th Clinical Natural Language Processing Workshop",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clinicalnlp-1.15",
doi = "10.18653/v1/2024.clinicalnlp-1.15",
pages = "185--195",
abstract = "This study evaluates the proficiency of Large Language Models (LLMs) in accurately labeling clinical document excerpts. Our focus is on the assignment of potential or confirmed diagnoses and medical procedures to snippets of medical text sourced from unstructured clinical patient records. We explore how the performance of LLMs compare against human annotators in classifying these excerpts. Employing a few-shot, chain-of-thought prompting approach with the MIMIC-III dataset, Med-PaLM 2 showcases annotation accuracy comparable to human annotators, achieving a notable precision rate of approximately 92{\%} relative to the gold standard labels established by human experts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amara-etal-2024-large">
<titleInfo>
<title>Large Language Models Provide Human-Level Medical Text Snippet Labeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ibtihel</namePart>
<namePart type="family">Amara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haiyang</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuchen</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benny</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rupesh</namePart>
<namePart type="family">Kartha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshay</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="family">Ben Abacha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danielle</namePart>
<namePart type="family">Bitterman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study evaluates the proficiency of Large Language Models (LLMs) in accurately labeling clinical document excerpts. Our focus is on the assignment of potential or confirmed diagnoses and medical procedures to snippets of medical text sourced from unstructured clinical patient records. We explore how the performance of LLMs compare against human annotators in classifying these excerpts. Employing a few-shot, chain-of-thought prompting approach with the MIMIC-III dataset, Med-PaLM 2 showcases annotation accuracy comparable to human annotators, achieving a notable precision rate of approximately 92% relative to the gold standard labels established by human experts.</abstract>
<identifier type="citekey">amara-etal-2024-large</identifier>
<identifier type="doi">10.18653/v1/2024.clinicalnlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2024.clinicalnlp-1.15</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>185</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Large Language Models Provide Human-Level Medical Text Snippet Labeling
%A Amara, Ibtihel
%A Yu, Haiyang
%A Zhang, Fan
%A Liu, Yuchen
%A Li, Benny
%A Liu, Chang
%A Kartha, Rupesh
%A Goel, Akshay
%Y Naumann, Tristan
%Y Ben Abacha, Asma
%Y Bethard, Steven
%Y Roberts, Kirk
%Y Bitterman, Danielle
%S Proceedings of the 6th Clinical Natural Language Processing Workshop
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F amara-etal-2024-large
%X This study evaluates the proficiency of Large Language Models (LLMs) in accurately labeling clinical document excerpts. Our focus is on the assignment of potential or confirmed diagnoses and medical procedures to snippets of medical text sourced from unstructured clinical patient records. We explore how the performance of LLMs compare against human annotators in classifying these excerpts. Employing a few-shot, chain-of-thought prompting approach with the MIMIC-III dataset, Med-PaLM 2 showcases annotation accuracy comparable to human annotators, achieving a notable precision rate of approximately 92% relative to the gold standard labels established by human experts.
%R 10.18653/v1/2024.clinicalnlp-1.15
%U https://aclanthology.org/2024.clinicalnlp-1.15
%U https://doi.org/10.18653/v1/2024.clinicalnlp-1.15
%P 185-195
Markdown (Informal)
[Large Language Models Provide Human-Level Medical Text Snippet Labeling](https://aclanthology.org/2024.clinicalnlp-1.15) (Amara et al., ClinicalNLP-WS 2024)
ACL
- Ibtihel Amara, Haiyang Yu, Fan Zhang, Yuchen Liu, Benny Li, Chang Liu, Rupesh Kartha, and Akshay Goel. 2024. Large Language Models Provide Human-Level Medical Text Snippet Labeling. In Proceedings of the 6th Clinical Natural Language Processing Workshop, pages 185–195, Mexico City, Mexico. Association for Computational Linguistics.