@inproceedings{min-etal-2025-speech,
title = "Speech-Integrated Modeling for Behavioral Coding in Counseling",
author = "Min, Do June and
P{\'e}rez-Rosas, Ver{\'o}nica and
Resnicow, Kenneth and
Mihalcea, Rada",
editor = "B{\'e}chet, Fr{\'e}d{\'e}ric and
Lef{\`e}vre, Fabrice and
Asher, Nicholas and
Kim, Seokhwan and
Merlin, Teva",
booktitle = "Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = aug,
year = "2025",
address = "Avignon, France",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sigdial-1.10/",
pages = "152--158",
abstract = "Computational models of psychotherapy often ignore vocal cues by relying solely on text. To address this, we propose MISQ, a framework that integrates speech features directly into language models using a speech encoder and lightweight adapter. MISQ improves behavioral analysis in counseling conversations, achieving {\textasciitilde}5{\%} relative gains over text-only or indirect speech methods{---}underscoring the value of vocal signals like tone and prosody."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="min-etal-2025-speech">
<titleInfo>
<title>Speech-Integrated Modeling for Behavioral Coding in Counseling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Do</namePart>
<namePart type="given">June</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verónica</namePart>
<namePart type="family">Pérez-Rosas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Resnicow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rada</namePart>
<namePart type="family">Mihalcea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrice</namePart>
<namePart type="family">Lefèvre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Asher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teva</namePart>
<namePart type="family">Merlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Avignon, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Computational models of psychotherapy often ignore vocal cues by relying solely on text. To address this, we propose MISQ, a framework that integrates speech features directly into language models using a speech encoder and lightweight adapter. MISQ improves behavioral analysis in counseling conversations, achieving ~5% relative gains over text-only or indirect speech methods—underscoring the value of vocal signals like tone and prosody.</abstract>
<identifier type="citekey">min-etal-2025-speech</identifier>
<location>
<url>https://aclanthology.org/2025.sigdial-1.10/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>152</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech-Integrated Modeling for Behavioral Coding in Counseling
%A Min, Do June
%A Pérez-Rosas, Verónica
%A Resnicow, Kenneth
%A Mihalcea, Rada
%Y Béchet, Frédéric
%Y Lefèvre, Fabrice
%Y Asher, Nicholas
%Y Kim, Seokhwan
%Y Merlin, Teva
%S Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2025
%8 August
%I Association for Computational Linguistics
%C Avignon, France
%F min-etal-2025-speech
%X Computational models of psychotherapy often ignore vocal cues by relying solely on text. To address this, we propose MISQ, a framework that integrates speech features directly into language models using a speech encoder and lightweight adapter. MISQ improves behavioral analysis in counseling conversations, achieving ~5% relative gains over text-only or indirect speech methods—underscoring the value of vocal signals like tone and prosody.
%U https://aclanthology.org/2025.sigdial-1.10/
%P 152-158
Markdown (Informal)
[Speech-Integrated Modeling for Behavioral Coding in Counseling](https://aclanthology.org/2025.sigdial-1.10/) (Min et al., SIGDIAL 2025)
ACL
- Do June Min, Verónica Pérez-Rosas, Kenneth Resnicow, and Rada Mihalcea. 2025. Speech-Integrated Modeling for Behavioral Coding in Counseling. In Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pages 152–158, Avignon, France. Association for Computational Linguistics.