@inproceedings{balaji-etal-2026-learning,
title = "Learning from Textual Radiology Reports: A Benchmark Dataset for Coronary {CT} Angiography",
author = "Balaji, Sudharshan and
Liu, Zhiyu and
Jiang, Zhengyuan and
Lei, Shuo and
Chen, Yimin and
Xiao, Yang and
Almeida, Shone O. and
Karivelil, Mathew Joseph and
Malanga, Christopher and
Wang, Ning",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-industry.33/",
pages = "480--493",
ISBN = "979-8-89176-394-4",
abstract = "While coronary imaging is widely used for anatomical assessment, CCTA reports play a distinct last-mile role in clinical care. Ratherthan serving as an intermediate signal, CCTA provides an assessment of coronary disease severity (known as the CAD-RADS score) toguide patient management. However, real-world clinical text exhibits substantial heterogeneity in terminology and structure, leadingto inconsistent interpretation by automated systems, even for clinically similar cases. Recent work leverages a direct application ofLLMs for automated CAD-RADS scoring, but is limited by small, non-public, and homogeneous clinical data. We introduce CCTA-RADS, the largest publicly available dataset of 940 real-world CCTA reports from a major cardiovascular center, each annotated with CAD-RADS scores. Our analysis reveals that direct approaches, including state-of-the-art LLMs (GPT-4o, GPT-o3) and fine-tuned BERT models underperform on diverse real-world clinical data. To address these limitations, we propose a two-stage pipeline that decouples structuring from classification: an LLM-based parser normalizes heterogeneous reports into structured format, followed by fine-tuned BERT classification. This approach substantially improves the F1-score by 6{\%}-13{\%} compared with direct methods. We deploy our system as an interactive web interface that allows clinicians to upload CCTA reports for automated CAD-RADS assessment with SHAP and LIME explainability visualizations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="balaji-etal-2026-learning">
<titleInfo>
<title>Learning from Textual Radiology Reports: A Benchmark Dataset for Coronary CT Angiography</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sudharshan</namePart>
<namePart type="family">Balaji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengyuan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuo</namePart>
<namePart type="family">Lei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yimin</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shone</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathew</namePart>
<namePart type="given">Joseph</namePart>
<namePart type="family">Karivelil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Malanga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ning</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-394-4</identifier>
</relatedItem>
<abstract>While coronary imaging is widely used for anatomical assessment, CCTA reports play a distinct last-mile role in clinical care. Ratherthan serving as an intermediate signal, CCTA provides an assessment of coronary disease severity (known as the CAD-RADS score) toguide patient management. However, real-world clinical text exhibits substantial heterogeneity in terminology and structure, leadingto inconsistent interpretation by automated systems, even for clinically similar cases. Recent work leverages a direct application ofLLMs for automated CAD-RADS scoring, but is limited by small, non-public, and homogeneous clinical data. We introduce CCTA-RADS, the largest publicly available dataset of 940 real-world CCTA reports from a major cardiovascular center, each annotated with CAD-RADS scores. Our analysis reveals that direct approaches, including state-of-the-art LLMs (GPT-4o, GPT-o3) and fine-tuned BERT models underperform on diverse real-world clinical data. To address these limitations, we propose a two-stage pipeline that decouples structuring from classification: an LLM-based parser normalizes heterogeneous reports into structured format, followed by fine-tuned BERT classification. This approach substantially improves the F1-score by 6%-13% compared with direct methods. We deploy our system as an interactive web interface that allows clinicians to upload CCTA reports for automated CAD-RADS assessment with SHAP and LIME explainability visualizations.</abstract>
<identifier type="citekey">balaji-etal-2026-learning</identifier>
<location>
<url>https://aclanthology.org/2026.acl-industry.33/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>480</start>
<end>493</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning from Textual Radiology Reports: A Benchmark Dataset for Coronary CT Angiography
%A Balaji, Sudharshan
%A Liu, Zhiyu
%A Jiang, Zhengyuan
%A Lei, Shuo
%A Chen, Yimin
%A Xiao, Yang
%A Almeida, Shone O.
%A Karivelil, Mathew Joseph
%A Malanga, Christopher
%A Wang, Ning
%Y Li, Yunyao
%Y Rehm, Georg
%Y Tu, Mei
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-394-4
%F balaji-etal-2026-learning
%X While coronary imaging is widely used for anatomical assessment, CCTA reports play a distinct last-mile role in clinical care. Ratherthan serving as an intermediate signal, CCTA provides an assessment of coronary disease severity (known as the CAD-RADS score) toguide patient management. However, real-world clinical text exhibits substantial heterogeneity in terminology and structure, leadingto inconsistent interpretation by automated systems, even for clinically similar cases. Recent work leverages a direct application ofLLMs for automated CAD-RADS scoring, but is limited by small, non-public, and homogeneous clinical data. We introduce CCTA-RADS, the largest publicly available dataset of 940 real-world CCTA reports from a major cardiovascular center, each annotated with CAD-RADS scores. Our analysis reveals that direct approaches, including state-of-the-art LLMs (GPT-4o, GPT-o3) and fine-tuned BERT models underperform on diverse real-world clinical data. To address these limitations, we propose a two-stage pipeline that decouples structuring from classification: an LLM-based parser normalizes heterogeneous reports into structured format, followed by fine-tuned BERT classification. This approach substantially improves the F1-score by 6%-13% compared with direct methods. We deploy our system as an interactive web interface that allows clinicians to upload CCTA reports for automated CAD-RADS assessment with SHAP and LIME explainability visualizations.
%U https://aclanthology.org/2026.acl-industry.33/
%P 480-493
Markdown (Informal)
[Learning from Textual Radiology Reports: A Benchmark Dataset for Coronary CT Angiography](https://aclanthology.org/2026.acl-industry.33/) (Balaji et al., ACL 2026)
ACL
- Sudharshan Balaji, Zhiyu Liu, Zhengyuan Jiang, Shuo Lei, Yimin Chen, Yang Xiao, Shone O. Almeida, Mathew Joseph Karivelil, Christopher Malanga, and Ning Wang. 2026. Learning from Textual Radiology Reports: A Benchmark Dataset for Coronary CT Angiography. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 480–493, San Diego, California, USA. Association for Computational Linguistics.