@inproceedings{ferrazzi-etal-2025-converting,
title = "Converting Annotated Clinical Cases into Structured Case Report Forms",
author = "Ferrazzi, Pietro and
Lavelli, Alberto and
Magnini, Bernardo",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Tsujii, Junichi",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing",
month = aug,
year = "2025",
address = "Viena, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-1.26/",
doi = "10.18653/v1/2025.bionlp-1.26",
pages = "307--318",
ISBN = "979-8-89176-275-6",
abstract = "Case Report Forms (CRFs) are largely used in medical research as they ensure accuracy, reliability, and validity of results in clinical studies. However, publicly available, well-annotated CRF datasets are scarce, limiting the development of CRF slot filling systems able to fill in a CRF from clinical notes. To mitigate the scarcity of CRF datasets, we propose to take advantage of available datasets annotated for information extraction tasks and to convert them into structured CRFs. We present a semi-automatic conversion methodology, which has been applied to the E3C dataset in two languages (English and Italian), resulting in a new, high-quality dataset for CRF slot filling. Through several experiments on the created dataset, we report that slot filling achieves 59.7{\%} for Italian and 67.3{\%} for English on a closed Large Language Models (zero-shot) and worse performances on three families of open-source models, showing that filling CRFs is challenging even for recent state-of-the-art LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferrazzi-etal-2025-converting">
<titleInfo>
<title>Converting Annotated Clinical Cases into Structured Case Report Forms</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pietro</namePart>
<namePart type="family">Ferrazzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernardo</namePart>
<namePart type="family">Magnini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Viena, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-275-6</identifier>
</relatedItem>
<abstract>Case Report Forms (CRFs) are largely used in medical research as they ensure accuracy, reliability, and validity of results in clinical studies. However, publicly available, well-annotated CRF datasets are scarce, limiting the development of CRF slot filling systems able to fill in a CRF from clinical notes. To mitigate the scarcity of CRF datasets, we propose to take advantage of available datasets annotated for information extraction tasks and to convert them into structured CRFs. We present a semi-automatic conversion methodology, which has been applied to the E3C dataset in two languages (English and Italian), resulting in a new, high-quality dataset for CRF slot filling. Through several experiments on the created dataset, we report that slot filling achieves 59.7% for Italian and 67.3% for English on a closed Large Language Models (zero-shot) and worse performances on three families of open-source models, showing that filling CRFs is challenging even for recent state-of-the-art LLMs.</abstract>
<identifier type="citekey">ferrazzi-etal-2025-converting</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-1.26</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-1.26/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>307</start>
<end>318</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Converting Annotated Clinical Cases into Structured Case Report Forms
%A Ferrazzi, Pietro
%A Lavelli, Alberto
%A Magnini, Bernardo
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Tsujii, Junichi
%S Proceedings of the 24th Workshop on Biomedical Language Processing
%D 2025
%8 August
%I Association for Computational Linguistics
%C Viena, Austria
%@ 979-8-89176-275-6
%F ferrazzi-etal-2025-converting
%X Case Report Forms (CRFs) are largely used in medical research as they ensure accuracy, reliability, and validity of results in clinical studies. However, publicly available, well-annotated CRF datasets are scarce, limiting the development of CRF slot filling systems able to fill in a CRF from clinical notes. To mitigate the scarcity of CRF datasets, we propose to take advantage of available datasets annotated for information extraction tasks and to convert them into structured CRFs. We present a semi-automatic conversion methodology, which has been applied to the E3C dataset in two languages (English and Italian), resulting in a new, high-quality dataset for CRF slot filling. Through several experiments on the created dataset, we report that slot filling achieves 59.7% for Italian and 67.3% for English on a closed Large Language Models (zero-shot) and worse performances on three families of open-source models, showing that filling CRFs is challenging even for recent state-of-the-art LLMs.
%R 10.18653/v1/2025.bionlp-1.26
%U https://aclanthology.org/2025.bionlp-1.26/
%U https://doi.org/10.18653/v1/2025.bionlp-1.26
%P 307-318
Markdown (Informal)
[Converting Annotated Clinical Cases into Structured Case Report Forms](https://aclanthology.org/2025.bionlp-1.26/) (Ferrazzi et al., BioNLP 2025)
ACL