@inproceedings{bellucci-etal-2026-segmentation,
title = "Segmentation Matters: Exploring {LLM}-Based Strategies for Temporal Clinical Event Identification in Oncology Reports",
author = "Bellucci, Cristiano and
Madeddu, Francesco and
Iacomini, Chiara and
Masciocchi, Carlotta and
Patarnello, Stefano and
Bernaschi, Massimo and
Santoro, Mario and
Lilli, Livia",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-1.47/",
pages = "595--604",
ISBN = "979-8-89176-434-7",
abstract = "Processing unstructured clinical narratives remains a major challenge in medical Natural Language Processing (NLP), particularly when critical information is embedded within lengthy and heterogeneous reports. Clinical notes often describe key diagnostic and therapeutic events through a verbose narrative, making automatic event identification difficult. In this work, we frame the identification of clinical events as a text segmentation task.We conduct a comparative study of three segmentation strategies applied to oncology reports: (i) a fully regex-based approach, (ii) a cascaded regex?LLM pipeline, and (iii) the same cascade architecture augmented with a recovery mechanism to mitigate LLM rephrasing. Segmentation quality is evaluated using complementary structural metrics (Pk, WindowDiff, Boundary Similarity, Segment Count Accuracy, and Text Overlap IoU), and its impact is also observed on downstream segment tagging, performed to identify the corresponding event type (e.g. surgery, biopsy, imaging, treatment, laboratory).The results demonstrate the high potential of LLM-based approaches, particularly in preserving semantic coherence within segments and generalization on new data sources. However, regex-based segmentation achieves higher performance according to structural segmentation metrics, also leading to better downstream clinical event identification. In general, these results highlight the critical role of context-adaptive high-quality segmentation strategies in the structuring of verbose clinical narratives and in the accurate identification of key patient events."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bellucci-etal-2026-segmentation">
<titleInfo>
<title>Segmentation Matters: Exploring LLM-Based Strategies for Temporal Clinical Event Identification in Oncology Reports</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cristiano</namePart>
<namePart type="family">Bellucci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Madeddu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Iacomini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlotta</namePart>
<namePart type="family">Masciocchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Patarnello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Bernaschi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Santoro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Livia</namePart>
<namePart type="family">Lilli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-434-7</identifier>
</relatedItem>
<abstract>Processing unstructured clinical narratives remains a major challenge in medical Natural Language Processing (NLP), particularly when critical information is embedded within lengthy and heterogeneous reports. Clinical notes often describe key diagnostic and therapeutic events through a verbose narrative, making automatic event identification difficult. In this work, we frame the identification of clinical events as a text segmentation task.We conduct a comparative study of three segmentation strategies applied to oncology reports: (i) a fully regex-based approach, (ii) a cascaded regex?LLM pipeline, and (iii) the same cascade architecture augmented with a recovery mechanism to mitigate LLM rephrasing. Segmentation quality is evaluated using complementary structural metrics (Pk, WindowDiff, Boundary Similarity, Segment Count Accuracy, and Text Overlap IoU), and its impact is also observed on downstream segment tagging, performed to identify the corresponding event type (e.g. surgery, biopsy, imaging, treatment, laboratory).The results demonstrate the high potential of LLM-based approaches, particularly in preserving semantic coherence within segments and generalization on new data sources. However, regex-based segmentation achieves higher performance according to structural segmentation metrics, also leading to better downstream clinical event identification. In general, these results highlight the critical role of context-adaptive high-quality segmentation strategies in the structuring of verbose clinical narratives and in the accurate identification of key patient events.</abstract>
<identifier type="citekey">bellucci-etal-2026-segmentation</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-1.47/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>595</start>
<end>604</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Segmentation Matters: Exploring LLM-Based Strategies for Temporal Clinical Event Identification in Oncology Reports
%A Bellucci, Cristiano
%A Madeddu, Francesco
%A Iacomini, Chiara
%A Masciocchi, Carlotta
%A Patarnello, Stefano
%A Bernaschi, Massimo
%A Santoro, Mario
%A Lilli, Livia
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S BioNLP 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-434-7
%F bellucci-etal-2026-segmentation
%X Processing unstructured clinical narratives remains a major challenge in medical Natural Language Processing (NLP), particularly when critical information is embedded within lengthy and heterogeneous reports. Clinical notes often describe key diagnostic and therapeutic events through a verbose narrative, making automatic event identification difficult. In this work, we frame the identification of clinical events as a text segmentation task.We conduct a comparative study of three segmentation strategies applied to oncology reports: (i) a fully regex-based approach, (ii) a cascaded regex?LLM pipeline, and (iii) the same cascade architecture augmented with a recovery mechanism to mitigate LLM rephrasing. Segmentation quality is evaluated using complementary structural metrics (Pk, WindowDiff, Boundary Similarity, Segment Count Accuracy, and Text Overlap IoU), and its impact is also observed on downstream segment tagging, performed to identify the corresponding event type (e.g. surgery, biopsy, imaging, treatment, laboratory).The results demonstrate the high potential of LLM-based approaches, particularly in preserving semantic coherence within segments and generalization on new data sources. However, regex-based segmentation achieves higher performance according to structural segmentation metrics, also leading to better downstream clinical event identification. In general, these results highlight the critical role of context-adaptive high-quality segmentation strategies in the structuring of verbose clinical narratives and in the accurate identification of key patient events.
%U https://aclanthology.org/2026.bionlp-1.47/
%P 595-604
Markdown (Informal)
[Segmentation Matters: Exploring LLM-Based Strategies for Temporal Clinical Event Identification in Oncology Reports](https://aclanthology.org/2026.bionlp-1.47/) (Bellucci et al., BioNLP 2026)
ACL
- Cristiano Bellucci, Francesco Madeddu, Chiara Iacomini, Carlotta Masciocchi, Stefano Patarnello, Massimo Bernaschi, Mario Santoro, and Livia Lilli. 2026. Segmentation Matters: Exploring LLM-Based Strategies for Temporal Clinical Event Identification in Oncology Reports. In BioNLP 2026, pages 595–604, San Diego, California. Association for Computational Linguistics.