@inproceedings{c-etal-2024-improving,
title = "Improving Few-shot Prompting using Cluster-based Sample Retrieval for Medical {NER} in Clinical Text",
author = "C, Meethu Mohan and
Punnan, Sneha Shaji and
Kleenankandy, Jeena",
editor = "Lalitha Devi, Sobha and
Arora, Karunesh",
booktitle = "Proceedings of the 21st International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2024",
address = "AU-KBC Research Centre, Chennai, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2024.icon-1.4/",
pages = "37--44",
abstract = "Named Entity Recognition (NER) in the medical domain is crucial for extracting essential information from clinical text. Large Language Models (LLMs) have demonstrated remarkable capabilities in this task, but their performance is highly dependent on the quality of the prompts. Few-shot prompting or prompt-by-example, where the input query to LLM is augmented with one or more sample outputs, is a well-known technique in guiding the LLMs to the expected result. The quality of the sample in the prompt plays an important role in this task. This paper proposes to improve the performance of few-shot prompting for medical NER on clinical text using a cluster-based strategy for sample selection. We employ the concepts from Retrieval Augmented Generation (RAG) and K-means clustering to identify the most similar annotated examples for any given input text. Using these contextually relevant yet divergent training samples as examples, we guide the LLM toward extracting more accurate medical entities. Our experiments using the llama-2 model show that this approach significantly outperforms zero-shot prompting and random sampled few-shot prompting in two data sets chosen for this study, demonstrating the efficacy of cluster-based retrieval in improving few-shot prompting for medical NER tasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="c-etal-2024-improving">
<titleInfo>
<title>Improving Few-shot Prompting using Cluster-based Sample Retrieval for Medical NER in Clinical Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meethu</namePart>
<namePart type="given">Mohan</namePart>
<namePart type="family">C</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sneha</namePart>
<namePart type="given">Shaji</namePart>
<namePart type="family">Punnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeena</namePart>
<namePart type="family">Kleenankandy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karunesh</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">AU-KBC Research Centre, Chennai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named Entity Recognition (NER) in the medical domain is crucial for extracting essential information from clinical text. Large Language Models (LLMs) have demonstrated remarkable capabilities in this task, but their performance is highly dependent on the quality of the prompts. Few-shot prompting or prompt-by-example, where the input query to LLM is augmented with one or more sample outputs, is a well-known technique in guiding the LLMs to the expected result. The quality of the sample in the prompt plays an important role in this task. This paper proposes to improve the performance of few-shot prompting for medical NER on clinical text using a cluster-based strategy for sample selection. We employ the concepts from Retrieval Augmented Generation (RAG) and K-means clustering to identify the most similar annotated examples for any given input text. Using these contextually relevant yet divergent training samples as examples, we guide the LLM toward extracting more accurate medical entities. Our experiments using the llama-2 model show that this approach significantly outperforms zero-shot prompting and random sampled few-shot prompting in two data sets chosen for this study, demonstrating the efficacy of cluster-based retrieval in improving few-shot prompting for medical NER tasks.</abstract>
<identifier type="citekey">c-etal-2024-improving</identifier>
<location>
<url>https://aclanthology.org/2024.icon-1.4/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>37</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Few-shot Prompting using Cluster-based Sample Retrieval for Medical NER in Clinical Text
%A C, Meethu Mohan
%A Punnan, Sneha Shaji
%A Kleenankandy, Jeena
%Y Lalitha Devi, Sobha
%Y Arora, Karunesh
%S Proceedings of the 21st International Conference on Natural Language Processing (ICON)
%D 2024
%8 December
%I NLP Association of India (NLPAI)
%C AU-KBC Research Centre, Chennai, India
%F c-etal-2024-improving
%X Named Entity Recognition (NER) in the medical domain is crucial for extracting essential information from clinical text. Large Language Models (LLMs) have demonstrated remarkable capabilities in this task, but their performance is highly dependent on the quality of the prompts. Few-shot prompting or prompt-by-example, where the input query to LLM is augmented with one or more sample outputs, is a well-known technique in guiding the LLMs to the expected result. The quality of the sample in the prompt plays an important role in this task. This paper proposes to improve the performance of few-shot prompting for medical NER on clinical text using a cluster-based strategy for sample selection. We employ the concepts from Retrieval Augmented Generation (RAG) and K-means clustering to identify the most similar annotated examples for any given input text. Using these contextually relevant yet divergent training samples as examples, we guide the LLM toward extracting more accurate medical entities. Our experiments using the llama-2 model show that this approach significantly outperforms zero-shot prompting and random sampled few-shot prompting in two data sets chosen for this study, demonstrating the efficacy of cluster-based retrieval in improving few-shot prompting for medical NER tasks.
%U https://aclanthology.org/2024.icon-1.4/
%P 37-44
Markdown (Informal)
[Improving Few-shot Prompting using Cluster-based Sample Retrieval for Medical NER in Clinical Text](https://aclanthology.org/2024.icon-1.4/) (C et al., ICON 2024)
ACL