@inproceedings{anastasia-etal-2026-multimodal,
title = "A Multimodal Framework for Aphasia Severity Classification in {R}ussian",
author = "Anastasia, Kolmogorova and
Yavshitz, Ekaterina and
Margolina, Anastasia and
Sugian, Anna",
editor = {Danilova, Vera and
Kurfal{\i}, Murathan and
S{\"o}derfeldt, Ylva and
Reed, Julia and
Burchell, Andrew},
booktitle = "Proceedings of the 1st Workshop on Linguistic Analysis for Health ({H}ea{L}ing 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.healing-1.22/",
pages = "257--265",
ISBN = "979-8-89176-367-8",
abstract = "Automatic classification of aphasia severity presents persistent challenges, particularly for languages with limited clinical speech resources such as Russian. This paper explores a multimodal approach to severity estimation that combines acoustic and semantic representations of pathological speech. Acoustic features are extracted using pretrained Wav2Vec 2.0 models, while semantic information is obtained from the encoder of the Whisper model. The two representations are integrated via early feature fusion and evaluated using gradient boosting classifiers in a speaker-independent cross-validation setting. Experiments are conducted on a newly collected dataset of Russian speech recordings from patients with aphasia and neurotypical speakers (RuAphasiaBank). The results suggest that the combined use of acoustic and semantic embeddings can provide more stable severity estimates than unimodal baselines. This study contributes empirical evidence on the applicability of multimodal representation learning for aphasia severity classification under data-scarce conditions."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="anastasia-etal-2026-multimodal">
<titleInfo>
<title>A Multimodal Framework for Aphasia Severity Classification in Russian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kolmogorova</namePart>
<namePart type="family">Anastasia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Yavshitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Margolina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Sugian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Linguistic Analysis for Health (HeaLing 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Danilova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murathan</namePart>
<namePart type="family">Kurfalı</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ylva</namePart>
<namePart type="family">Söderfeldt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Reed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Burchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-367-8</identifier>
</relatedItem>
<abstract>Automatic classification of aphasia severity presents persistent challenges, particularly for languages with limited clinical speech resources such as Russian. This paper explores a multimodal approach to severity estimation that combines acoustic and semantic representations of pathological speech. Acoustic features are extracted using pretrained Wav2Vec 2.0 models, while semantic information is obtained from the encoder of the Whisper model. The two representations are integrated via early feature fusion and evaluated using gradient boosting classifiers in a speaker-independent cross-validation setting. Experiments are conducted on a newly collected dataset of Russian speech recordings from patients with aphasia and neurotypical speakers (RuAphasiaBank). The results suggest that the combined use of acoustic and semantic embeddings can provide more stable severity estimates than unimodal baselines. This study contributes empirical evidence on the applicability of multimodal representation learning for aphasia severity classification under data-scarce conditions.</abstract>
<identifier type="citekey">anastasia-etal-2026-multimodal</identifier>
<location>
<url>https://aclanthology.org/2026.healing-1.22/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>257</start>
<end>265</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multimodal Framework for Aphasia Severity Classification in Russian
%A Anastasia, Kolmogorova
%A Yavshitz, Ekaterina
%A Margolina, Anastasia
%A Sugian, Anna
%Y Danilova, Vera
%Y Kurfalı, Murathan
%Y Söderfeldt, Ylva
%Y Reed, Julia
%Y Burchell, Andrew
%S Proceedings of the 1st Workshop on Linguistic Analysis for Health (HeaLing 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-367-8
%F anastasia-etal-2026-multimodal
%X Automatic classification of aphasia severity presents persistent challenges, particularly for languages with limited clinical speech resources such as Russian. This paper explores a multimodal approach to severity estimation that combines acoustic and semantic representations of pathological speech. Acoustic features are extracted using pretrained Wav2Vec 2.0 models, while semantic information is obtained from the encoder of the Whisper model. The two representations are integrated via early feature fusion and evaluated using gradient boosting classifiers in a speaker-independent cross-validation setting. Experiments are conducted on a newly collected dataset of Russian speech recordings from patients with aphasia and neurotypical speakers (RuAphasiaBank). The results suggest that the combined use of acoustic and semantic embeddings can provide more stable severity estimates than unimodal baselines. This study contributes empirical evidence on the applicability of multimodal representation learning for aphasia severity classification under data-scarce conditions.
%U https://aclanthology.org/2026.healing-1.22/
%P 257-265
Markdown (Informal)
[A Multimodal Framework for Aphasia Severity Classification in Russian](https://aclanthology.org/2026.healing-1.22/) (Anastasia et al., HeaLing 2026)
ACL