@inproceedings{kodikara-verspoor-2024-lesser,
title = "Lesser the Shots, Higher the Hallucinations: Exploration of Genetic Information Extraction using Generative Large Language Models",
author = "Kodikara, Milindi and
Verspoor, Karin",
editor = "Baldwin, Tim and
Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e} and
Kuo, Nicholas",
booktitle = "Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2024",
address = "Canberra, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.alta-1.10/",
pages = "130--145",
abstract = "Organisation of information about genes, genetic variants, and associated diseases from vast quantities of scientific literature texts through automated information extraction (IE) strategies can facilitate progress in personalised medicine. We systematically evaluate the performance of generative large language models (LLMs) on the extraction of specialised genetic information, focusing on end-to-end IE encompassing both named entity recognition and relation extraction. We experiment across multilingual datasets with a range of instruction strategies, including zero-shot and few-shot prompting along with providing an annotation guideline. Optimal results are obtained with few-shot prompting. However, we also identify that generative LLMs failed to adhere to the instructions provided, leading to over-generation of entities and relations. We therefore carefully examine the effect of learning paradigms on the extent to which genetic entities are fabricated, and the limitations of exact matching to determine performance of the model."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kodikara-verspoor-2024-lesser">
<titleInfo>
<title>Lesser the Shots, Higher the Hallucinations: Exploration of Genetic Information Extraction using Generative Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Milindi</namePart>
<namePart type="family">Kodikara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">Verspoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="given">José</namePart>
<namePart type="family">Rodríguez Méndez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Kuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Canberra, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Organisation of information about genes, genetic variants, and associated diseases from vast quantities of scientific literature texts through automated information extraction (IE) strategies can facilitate progress in personalised medicine. We systematically evaluate the performance of generative large language models (LLMs) on the extraction of specialised genetic information, focusing on end-to-end IE encompassing both named entity recognition and relation extraction. We experiment across multilingual datasets with a range of instruction strategies, including zero-shot and few-shot prompting along with providing an annotation guideline. Optimal results are obtained with few-shot prompting. However, we also identify that generative LLMs failed to adhere to the instructions provided, leading to over-generation of entities and relations. We therefore carefully examine the effect of learning paradigms on the extent to which genetic entities are fabricated, and the limitations of exact matching to determine performance of the model.</abstract>
<identifier type="citekey">kodikara-verspoor-2024-lesser</identifier>
<location>
<url>https://aclanthology.org/2024.alta-1.10/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>130</start>
<end>145</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lesser the Shots, Higher the Hallucinations: Exploration of Genetic Information Extraction using Generative Large Language Models
%A Kodikara, Milindi
%A Verspoor, Karin
%Y Baldwin, Tim
%Y Rodríguez Méndez, Sergio José
%Y Kuo, Nicholas
%S Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association
%D 2024
%8 December
%I Association for Computational Linguistics
%C Canberra, Australia
%F kodikara-verspoor-2024-lesser
%X Organisation of information about genes, genetic variants, and associated diseases from vast quantities of scientific literature texts through automated information extraction (IE) strategies can facilitate progress in personalised medicine. We systematically evaluate the performance of generative large language models (LLMs) on the extraction of specialised genetic information, focusing on end-to-end IE encompassing both named entity recognition and relation extraction. We experiment across multilingual datasets with a range of instruction strategies, including zero-shot and few-shot prompting along with providing an annotation guideline. Optimal results are obtained with few-shot prompting. However, we also identify that generative LLMs failed to adhere to the instructions provided, leading to over-generation of entities and relations. We therefore carefully examine the effect of learning paradigms on the extent to which genetic entities are fabricated, and the limitations of exact matching to determine performance of the model.
%U https://aclanthology.org/2024.alta-1.10/
%P 130-145
Markdown (Informal)
[Lesser the Shots, Higher the Hallucinations: Exploration of Genetic Information Extraction using Generative Large Language Models](https://aclanthology.org/2024.alta-1.10/) (Kodikara & Verspoor, ALTA 2024)
ACL