@inproceedings{song-ahmed-2026-tonal,
title = "Tonal Salience in Cognitive Decline: In-Context {MCI} Detection with Multimodal {LLM}s",
author = "Song, Christopher and
Ahmed, Abdullah P. Rashed",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.81/",
pages = "913--922",
ISBN = "979-8-89176-393-7",
abstract = "Speech-based screening for mild cognitive impairment offers a highly accessible way to detect early cognitive decline. While most existing work focuses on English, cross-linguistic research is emerging to examine how cognitive decline manifests across languages. Studies on the Interspeech 2024 TAUKADIAL dataset, comprising English and Chinese speech recordings, consistently report higher classification performance on Chinese, yet the cause of this cross-lingual discrepancy remains unexplored. We examine this gap using Gemini 2.5 Pro, a multimodal large language model, using zero-shot and in-context-learning (ICL) paradigms. We hypothesize that this disparity is rooted in language typology: in tonal languages like Chinese, pitch encodes lexical meaning in every syllable (tone), whereas in non-tonal languages like English, pitch carries no lexical function. To test this, we pitch-flattened audio from TAUKADIAL and compared how classification performance changed across both languages. We found that Chinese classification degraded significantly under both zero-shot and ICL conditions (-4.78 and -5.92 UAR, respectively), while English performance increased (+0.11 and +2.98 UAR), implicating tonal pitch as the cross-lingual advantage. These findings suggest language typology should inform the design of audio-based cognitive screening tools, with raw audio preferred for tonal languages and text for non-tonal languages, a distinction critical for developing equitable cross-linguistic screening."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="song-ahmed-2026-tonal">
<titleInfo>
<title>Tonal Salience in Cognitive Decline: In-Context MCI Detection with Multimodal LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullah</namePart>
<namePart type="given">P</namePart>
<namePart type="given">Rashed</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Speech-based screening for mild cognitive impairment offers a highly accessible way to detect early cognitive decline. While most existing work focuses on English, cross-linguistic research is emerging to examine how cognitive decline manifests across languages. Studies on the Interspeech 2024 TAUKADIAL dataset, comprising English and Chinese speech recordings, consistently report higher classification performance on Chinese, yet the cause of this cross-lingual discrepancy remains unexplored. We examine this gap using Gemini 2.5 Pro, a multimodal large language model, using zero-shot and in-context-learning (ICL) paradigms. We hypothesize that this disparity is rooted in language typology: in tonal languages like Chinese, pitch encodes lexical meaning in every syllable (tone), whereas in non-tonal languages like English, pitch carries no lexical function. To test this, we pitch-flattened audio from TAUKADIAL and compared how classification performance changed across both languages. We found that Chinese classification degraded significantly under both zero-shot and ICL conditions (-4.78 and -5.92 UAR, respectively), while English performance increased (+0.11 and +2.98 UAR), implicating tonal pitch as the cross-lingual advantage. These findings suggest language typology should inform the design of audio-based cognitive screening tools, with raw audio preferred for tonal languages and text for non-tonal languages, a distinction critical for developing equitable cross-linguistic screening.</abstract>
<identifier type="citekey">song-ahmed-2026-tonal</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.81/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>913</start>
<end>922</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tonal Salience in Cognitive Decline: In-Context MCI Detection with Multimodal LLMs
%A Song, Christopher
%A Ahmed, Abdullah P. Rashed
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F song-ahmed-2026-tonal
%X Speech-based screening for mild cognitive impairment offers a highly accessible way to detect early cognitive decline. While most existing work focuses on English, cross-linguistic research is emerging to examine how cognitive decline manifests across languages. Studies on the Interspeech 2024 TAUKADIAL dataset, comprising English and Chinese speech recordings, consistently report higher classification performance on Chinese, yet the cause of this cross-lingual discrepancy remains unexplored. We examine this gap using Gemini 2.5 Pro, a multimodal large language model, using zero-shot and in-context-learning (ICL) paradigms. We hypothesize that this disparity is rooted in language typology: in tonal languages like Chinese, pitch encodes lexical meaning in every syllable (tone), whereas in non-tonal languages like English, pitch carries no lexical function. To test this, we pitch-flattened audio from TAUKADIAL and compared how classification performance changed across both languages. We found that Chinese classification degraded significantly under both zero-shot and ICL conditions (-4.78 and -5.92 UAR, respectively), while English performance increased (+0.11 and +2.98 UAR), implicating tonal pitch as the cross-lingual advantage. These findings suggest language typology should inform the design of audio-based cognitive screening tools, with raw audio preferred for tonal languages and text for non-tonal languages, a distinction critical for developing equitable cross-linguistic screening.
%U https://aclanthology.org/2026.acl-srw.81/
%P 913-922
Markdown (Informal)
[Tonal Salience in Cognitive Decline: In-Context MCI Detection with Multimodal LLMs](https://aclanthology.org/2026.acl-srw.81/) (Song & Ahmed, ACL 2026)
ACL