@inproceedings{farzi-etal-2024-get,
title = "Get the Best out of 1{B} {LLM}s: Insights from Information Extraction on Clinical Documents",
author = "Farzi, Saeed and
Ghosh, Soumitra and
Lavelli, Alberto and
Magnini, Bernardo",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "Proceedings of the 23rd Workshop on Biomedical Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.bionlp-1.21",
doi = "10.18653/v1/2024.bionlp-1.21",
pages = "266--276",
abstract = "While the popularity of large, versatile language models like ChatGPT continues to rise, the landscape shifts when considering open-source models tailored to specific domains. Moreover, many areas, such as clinical documents, suffer from a scarcity of training data, often amounting to only a few hundred instances. Additionally, in certain settings, such as hospitals, cloud-based solutions pose privacy concerns, necessitating the deployment of language models on traditional hardware, such as single GPUs or powerful CPUs. To address these complexities, we conduct extensive experiments on both clinical entity detection and relation extraction in clinical documents using 1B parameter models. Our study delves into traditional fine-tuning, continuous pre-training in the medical domain, and instruction-tuning methods, providing valuable insights into their effectiveness in a multilingual setting. Our results underscore the importance of domain-specific models and pre-training for clinical natural language processing tasks. Furthermore, data augmentation using cross-lingual information improves performance in most cases, highlighting the potential for multilingual enhancements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="farzi-etal-2024-get">
<titleInfo>
<title>Get the Best out of 1B LLMs: Insights from Information Extraction on Clinical Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saeed</namePart>
<namePart type="family">Farzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soumitra</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernardo</namePart>
<namePart type="family">Magnini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Workshop on Biomedical Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While the popularity of large, versatile language models like ChatGPT continues to rise, the landscape shifts when considering open-source models tailored to specific domains. Moreover, many areas, such as clinical documents, suffer from a scarcity of training data, often amounting to only a few hundred instances. Additionally, in certain settings, such as hospitals, cloud-based solutions pose privacy concerns, necessitating the deployment of language models on traditional hardware, such as single GPUs or powerful CPUs. To address these complexities, we conduct extensive experiments on both clinical entity detection and relation extraction in clinical documents using 1B parameter models. Our study delves into traditional fine-tuning, continuous pre-training in the medical domain, and instruction-tuning methods, providing valuable insights into their effectiveness in a multilingual setting. Our results underscore the importance of domain-specific models and pre-training for clinical natural language processing tasks. Furthermore, data augmentation using cross-lingual information improves performance in most cases, highlighting the potential for multilingual enhancements.</abstract>
<identifier type="citekey">farzi-etal-2024-get</identifier>
<identifier type="doi">10.18653/v1/2024.bionlp-1.21</identifier>
<location>
<url>https://aclanthology.org/2024.bionlp-1.21</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>266</start>
<end>276</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Get the Best out of 1B LLMs: Insights from Information Extraction on Clinical Documents
%A Farzi, Saeed
%A Ghosh, Soumitra
%A Lavelli, Alberto
%A Magnini, Bernardo
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S Proceedings of the 23rd Workshop on Biomedical Natural Language Processing
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F farzi-etal-2024-get
%X While the popularity of large, versatile language models like ChatGPT continues to rise, the landscape shifts when considering open-source models tailored to specific domains. Moreover, many areas, such as clinical documents, suffer from a scarcity of training data, often amounting to only a few hundred instances. Additionally, in certain settings, such as hospitals, cloud-based solutions pose privacy concerns, necessitating the deployment of language models on traditional hardware, such as single GPUs or powerful CPUs. To address these complexities, we conduct extensive experiments on both clinical entity detection and relation extraction in clinical documents using 1B parameter models. Our study delves into traditional fine-tuning, continuous pre-training in the medical domain, and instruction-tuning methods, providing valuable insights into their effectiveness in a multilingual setting. Our results underscore the importance of domain-specific models and pre-training for clinical natural language processing tasks. Furthermore, data augmentation using cross-lingual information improves performance in most cases, highlighting the potential for multilingual enhancements.
%R 10.18653/v1/2024.bionlp-1.21
%U https://aclanthology.org/2024.bionlp-1.21
%U https://doi.org/10.18653/v1/2024.bionlp-1.21
%P 266-276
Markdown (Informal)
[Get the Best out of 1B LLMs: Insights from Information Extraction on Clinical Documents](https://aclanthology.org/2024.bionlp-1.21) (Farzi et al., BioNLP-WS 2024)
ACL