@inproceedings{barlow-etal-2026-using,
title = "Using Synthetic Records to Improve Automated Identification of Seizure Freedom in Clinical Text about People with Epilepsy",
author = "Barlow, Stephen and
Gan, Yujian and
Davies, Joe and
Winston, Joel and
Teo, James and
Richardson, Mark and
Holgate, Ben",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-1.3/",
pages = "20--30",
ISBN = "979-8-89176-434-7",
abstract = "Seizure freedom is a key clinical outcome for people with epilepsy (PWE) yet it is primarily recorded in free-text notes and letters in the United Kingdom, making it difficult to aggregate and track at scale. This paper introduces a generative LLM-based pipeline boosted by synthetic data to identify a PWE{'}s seizure freedom status in clinicians' records. We fine-tuned seven different LLMs with between 4-14 billion parameters using LoRA to compare models trained on synthetic records against those trained on expert annotated records. The best performing configuration, based on Qwen-2.5-14B, was trained entirely on synthetic records and used chain-of-thought (CoT) reasoning (both generated by GPT-5). This achieved an F1 score of 0.90{\ensuremath{\pm}}0.02 on double-annotated test data and outperformed the equivalent model trained on authentic clinician records, which achieved 0.87{\ensuremath{\pm}}0.04. The synthetically trained models also have the benefit of outputting their CoT reasoning process for greater decision-making transparency and can also make use of the unused supervised training data for significantly increased test examples. This work has implications for monitoring a key treatment outcome for PWE automatically and at scale."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barlow-etal-2026-using">
<titleInfo>
<title>Using Synthetic Records to Improve Automated Identification of Seizure Freedom in Clinical Text about People with Epilepsy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Barlow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yujian</namePart>
<namePart type="family">Gan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joe</namePart>
<namePart type="family">Davies</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Winston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Teo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Richardson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Holgate</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-434-7</identifier>
</relatedItem>
<abstract>Seizure freedom is a key clinical outcome for people with epilepsy (PWE) yet it is primarily recorded in free-text notes and letters in the United Kingdom, making it difficult to aggregate and track at scale. This paper introduces a generative LLM-based pipeline boosted by synthetic data to identify a PWE’s seizure freedom status in clinicians’ records. We fine-tuned seven different LLMs with between 4-14 billion parameters using LoRA to compare models trained on synthetic records against those trained on expert annotated records. The best performing configuration, based on Qwen-2.5-14B, was trained entirely on synthetic records and used chain-of-thought (CoT) reasoning (both generated by GPT-5). This achieved an F1 score of 0.90\ensuremath\pm0.02 on double-annotated test data and outperformed the equivalent model trained on authentic clinician records, which achieved 0.87\ensuremath\pm0.04. The synthetically trained models also have the benefit of outputting their CoT reasoning process for greater decision-making transparency and can also make use of the unused supervised training data for significantly increased test examples. This work has implications for monitoring a key treatment outcome for PWE automatically and at scale.</abstract>
<identifier type="citekey">barlow-etal-2026-using</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-1.3/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>20</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Synthetic Records to Improve Automated Identification of Seizure Freedom in Clinical Text about People with Epilepsy
%A Barlow, Stephen
%A Gan, Yujian
%A Davies, Joe
%A Winston, Joel
%A Teo, James
%A Richardson, Mark
%A Holgate, Ben
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S BioNLP 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-434-7
%F barlow-etal-2026-using
%X Seizure freedom is a key clinical outcome for people with epilepsy (PWE) yet it is primarily recorded in free-text notes and letters in the United Kingdom, making it difficult to aggregate and track at scale. This paper introduces a generative LLM-based pipeline boosted by synthetic data to identify a PWE’s seizure freedom status in clinicians’ records. We fine-tuned seven different LLMs with between 4-14 billion parameters using LoRA to compare models trained on synthetic records against those trained on expert annotated records. The best performing configuration, based on Qwen-2.5-14B, was trained entirely on synthetic records and used chain-of-thought (CoT) reasoning (both generated by GPT-5). This achieved an F1 score of 0.90\ensuremath\pm0.02 on double-annotated test data and outperformed the equivalent model trained on authentic clinician records, which achieved 0.87\ensuremath\pm0.04. The synthetically trained models also have the benefit of outputting their CoT reasoning process for greater decision-making transparency and can also make use of the unused supervised training data for significantly increased test examples. This work has implications for monitoring a key treatment outcome for PWE automatically and at scale.
%U https://aclanthology.org/2026.bionlp-1.3/
%P 20-30
Markdown (Informal)
[Using Synthetic Records to Improve Automated Identification of Seizure Freedom in Clinical Text about People with Epilepsy](https://aclanthology.org/2026.bionlp-1.3/) (Barlow et al., BioNLP 2026)
ACL