@inproceedings{roll-etal-2026-text,
title = "The Text Aphasia Battery ({TAB}): A Clinically-Grounded Benchmark for Aphasia-Like Deficits in Language Models",
author = "Roll, Nathan and
Kries, Jill and
Jin, Flora and
Wang, Catherine and
Finley, Ann Marie and
Sumner, Meghan and
Shain, Cory and
Gwilliams, Laura",
editor = "Zirikly, Aya and
Bar, Kfir and
MacAvaney, Sean and
Ireland, Molly and
Ophir, Yaakov and
Atzil-Slonim, Dana and
Varadarajan, Vasudha and
Bedrick, Steven and
Desmet, Bart",
booktitle = "Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology ({CLP}sych 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.clpsych-1.27/",
pages = "340--354",
ISBN = "979-8-89176-421-7",
abstract = "Large language models (LLMs) have emerged as a candidate `model organism' for human language, offering an unprecedented opportunity to study the computational basis of linguistic disorders like aphasia. However, traditional clinical assessments are ill-suited for LLMs, as they presuppose human-like pragmatic pressures and probe cognitive processes not inherent to artificial architectures. We introduce the Text Aphasia Battery (TAB), a text-only benchmark adapted from the Quick Aphasia Battery (QAB) to assess aphasic-like deficits in LLMs. The TAB comprises four subtests: Connected Text, Word Comprehension, Sentence Comprehension, and Repetition. This paper details the TAB{'}s design, subtests, and scoring criteria. To facilitate large-scale use, we validate an automated evaluation protocol using Gemini 2.5 Flash, which achieves reliability comparable to expert human raters (prevalence-weighted Cohen{'}s k=0.255 for model{--}consensus agreement vs. 0.286 for human{--}human agreement). We release TAB as a clinically-grounded, scalable framework for analyzing language deficits in artificial systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roll-etal-2026-text">
<titleInfo>
<title>The Text Aphasia Battery (TAB): A Clinically-Grounded Benchmark for Aphasia-Like Deficits in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Roll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Kries</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flora</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catherine</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ann</namePart>
<namePart type="given">Marie</namePart>
<namePart type="family">Finley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meghan</namePart>
<namePart type="family">Sumner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cory</namePart>
<namePart type="family">Shain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Gwilliams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aya</namePart>
<namePart type="family">Zirikly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Ireland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaakov</namePart>
<namePart type="family">Ophir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Atzil-Slonim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudha</namePart>
<namePart type="family">Varadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Desmet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-421-7</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have emerged as a candidate ‘model organism’ for human language, offering an unprecedented opportunity to study the computational basis of linguistic disorders like aphasia. However, traditional clinical assessments are ill-suited for LLMs, as they presuppose human-like pragmatic pressures and probe cognitive processes not inherent to artificial architectures. We introduce the Text Aphasia Battery (TAB), a text-only benchmark adapted from the Quick Aphasia Battery (QAB) to assess aphasic-like deficits in LLMs. The TAB comprises four subtests: Connected Text, Word Comprehension, Sentence Comprehension, and Repetition. This paper details the TAB’s design, subtests, and scoring criteria. To facilitate large-scale use, we validate an automated evaluation protocol using Gemini 2.5 Flash, which achieves reliability comparable to expert human raters (prevalence-weighted Cohen’s k=0.255 for model–consensus agreement vs. 0.286 for human–human agreement). We release TAB as a clinically-grounded, scalable framework for analyzing language deficits in artificial systems.</abstract>
<identifier type="citekey">roll-etal-2026-text</identifier>
<location>
<url>https://aclanthology.org/2026.clpsych-1.27/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>340</start>
<end>354</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Text Aphasia Battery (TAB): A Clinically-Grounded Benchmark for Aphasia-Like Deficits in Language Models
%A Roll, Nathan
%A Kries, Jill
%A Jin, Flora
%A Wang, Catherine
%A Finley, Ann Marie
%A Sumner, Meghan
%A Shain, Cory
%A Gwilliams, Laura
%Y Zirikly, Aya
%Y Bar, Kfir
%Y MacAvaney, Sean
%Y Ireland, Molly
%Y Ophir, Yaakov
%Y Atzil-Slonim, Dana
%Y Varadarajan, Vasudha
%Y Bedrick, Steven
%Y Desmet, Bart
%S Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-421-7
%F roll-etal-2026-text
%X Large language models (LLMs) have emerged as a candidate ‘model organism’ for human language, offering an unprecedented opportunity to study the computational basis of linguistic disorders like aphasia. However, traditional clinical assessments are ill-suited for LLMs, as they presuppose human-like pragmatic pressures and probe cognitive processes not inherent to artificial architectures. We introduce the Text Aphasia Battery (TAB), a text-only benchmark adapted from the Quick Aphasia Battery (QAB) to assess aphasic-like deficits in LLMs. The TAB comprises four subtests: Connected Text, Word Comprehension, Sentence Comprehension, and Repetition. This paper details the TAB’s design, subtests, and scoring criteria. To facilitate large-scale use, we validate an automated evaluation protocol using Gemini 2.5 Flash, which achieves reliability comparable to expert human raters (prevalence-weighted Cohen’s k=0.255 for model–consensus agreement vs. 0.286 for human–human agreement). We release TAB as a clinically-grounded, scalable framework for analyzing language deficits in artificial systems.
%U https://aclanthology.org/2026.clpsych-1.27/
%P 340-354
Markdown (Informal)
[The Text Aphasia Battery (TAB): A Clinically-Grounded Benchmark for Aphasia-Like Deficits in Language Models](https://aclanthology.org/2026.clpsych-1.27/) (Roll et al., CLPsych 2026)
ACL
- Nathan Roll, Jill Kries, Flora Jin, Catherine Wang, Ann Marie Finley, Meghan Sumner, Cory Shain, and Laura Gwilliams. 2026. The Text Aphasia Battery (TAB): A Clinically-Grounded Benchmark for Aphasia-Like Deficits in Language Models. In Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026), pages 340–354, San Diego, California, USA. Association for Computational Linguistics.