@inproceedings{agarwal-etal-2025-framework,
title = "A Framework for Flexible Extraction of Clinical Event Contextual Properties from Electronic Health Records",
author = "Agarwal, Shubham and
Searle, Thomas and
Ratas, Mart and
Shek, Anthony and
Teo, James and
Dobson, Richard",
editor = "Rehm, Georg and
Li, Yunyao",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-industry.66/",
doi = "10.18653/v1/2025.acl-industry.66",
pages = "946--959",
ISBN = "979-8-89176-288-6",
abstract = "Electronic Health Records contain vast amounts of valuable clinical data, much of which is stored as unstructured text. Extracting meaningful clinical events (e.g., disorders, symptoms, findings, medications, and procedures etc.) in context within real-world healthcare settings is crucial for enabling downstream applications such as disease prediction, clinical coding for billing and decision support.After Named Entity Recognition and Linking (NER+L) methodology, the identified concepts need to be further classified (i.e. contextualized) for distinct properties such as their relevance to the patient, their temporal and negated status for meaningful clinical use. We present a solution that, using an existing NER+L approach - MedCAT, classifies and contextualizes medical entities at scale. We evaluate the NLP approaches through 14 distinct real-world clinical text classification projects, testing our suite of models tailored to different clinical NLP needs. For tasks requiring high minority class recall, BERT proves the most effective when coupled with class imbalance mitigation techniques, outperforming Bi-LSTM with up to 28{\%}. For majority class focused tasks, Bi-LSTM offers a lightweight alternative with, on average, 32{\%} faster training time and lower computational cost. Importantly, these tools are integrated into an openly available library, enabling users to select the best model for their specific downstream applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="agarwal-etal-2025-framework">
<titleInfo>
<title>A Framework for Flexible Extraction of Clinical Event Contextual Properties from Electronic Health Records</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Searle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mart</namePart>
<namePart type="family">Ratas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anthony</namePart>
<namePart type="family">Shek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Teo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Dobson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-288-6</identifier>
</relatedItem>
<abstract>Electronic Health Records contain vast amounts of valuable clinical data, much of which is stored as unstructured text. Extracting meaningful clinical events (e.g., disorders, symptoms, findings, medications, and procedures etc.) in context within real-world healthcare settings is crucial for enabling downstream applications such as disease prediction, clinical coding for billing and decision support.After Named Entity Recognition and Linking (NER+L) methodology, the identified concepts need to be further classified (i.e. contextualized) for distinct properties such as their relevance to the patient, their temporal and negated status for meaningful clinical use. We present a solution that, using an existing NER+L approach - MedCAT, classifies and contextualizes medical entities at scale. We evaluate the NLP approaches through 14 distinct real-world clinical text classification projects, testing our suite of models tailored to different clinical NLP needs. For tasks requiring high minority class recall, BERT proves the most effective when coupled with class imbalance mitigation techniques, outperforming Bi-LSTM with up to 28%. For majority class focused tasks, Bi-LSTM offers a lightweight alternative with, on average, 32% faster training time and lower computational cost. Importantly, these tools are integrated into an openly available library, enabling users to select the best model for their specific downstream applications.</abstract>
<identifier type="citekey">agarwal-etal-2025-framework</identifier>
<identifier type="doi">10.18653/v1/2025.acl-industry.66</identifier>
<location>
<url>https://aclanthology.org/2025.acl-industry.66/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>946</start>
<end>959</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Framework for Flexible Extraction of Clinical Event Contextual Properties from Electronic Health Records
%A Agarwal, Shubham
%A Searle, Thomas
%A Ratas, Mart
%A Shek, Anthony
%A Teo, James
%A Dobson, Richard
%Y Rehm, Georg
%Y Li, Yunyao
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-288-6
%F agarwal-etal-2025-framework
%X Electronic Health Records contain vast amounts of valuable clinical data, much of which is stored as unstructured text. Extracting meaningful clinical events (e.g., disorders, symptoms, findings, medications, and procedures etc.) in context within real-world healthcare settings is crucial for enabling downstream applications such as disease prediction, clinical coding for billing and decision support.After Named Entity Recognition and Linking (NER+L) methodology, the identified concepts need to be further classified (i.e. contextualized) for distinct properties such as their relevance to the patient, their temporal and negated status for meaningful clinical use. We present a solution that, using an existing NER+L approach - MedCAT, classifies and contextualizes medical entities at scale. We evaluate the NLP approaches through 14 distinct real-world clinical text classification projects, testing our suite of models tailored to different clinical NLP needs. For tasks requiring high minority class recall, BERT proves the most effective when coupled with class imbalance mitigation techniques, outperforming Bi-LSTM with up to 28%. For majority class focused tasks, Bi-LSTM offers a lightweight alternative with, on average, 32% faster training time and lower computational cost. Importantly, these tools are integrated into an openly available library, enabling users to select the best model for their specific downstream applications.
%R 10.18653/v1/2025.acl-industry.66
%U https://aclanthology.org/2025.acl-industry.66/
%U https://doi.org/10.18653/v1/2025.acl-industry.66
%P 946-959
Markdown (Informal)
[A Framework for Flexible Extraction of Clinical Event Contextual Properties from Electronic Health Records](https://aclanthology.org/2025.acl-industry.66/) (Agarwal et al., ACL 2025)
ACL