@inproceedings{ajichi-etal-2026-adaptive,
title = "Adaptive Multimodal Sentiment Analysis with Stream-Based Active Learning for Spoken Dialogue Systems",
author = "Ajichi, Atsuto and
Hayashi, Takato and
Komatani, Kazunori and
Okada, Shogo",
editor = "Riccardi, Giuseppe and
Mousavi, Seyed Mahed and
Torres, Maria Ines and
Yoshino, Koichiro and
Callejas, Zoraida and
Chowdhury, Shammur Absar and
Chen, Yun-Nung and
Bechet, Frederic and
Gustafson, Joakim and
Damnati, G{\'e}raldine and
Papangelis, Alex and
D{'}Haro, Luis Fernando and
Mendon{\c{c}}a, John and
Bernardi, Raffaella and
Hakkani-Tur, Dilek and
Di Fabbrizio, Giuseppe {''}Pino{''} and
Kawahara, Tatsuya and
Alam, Firoj and
Tur, Gokhan and
Johnston, Michael",
booktitle = "Proceedings of the 16th International Workshop on Spoken Dialogue System Technology",
month = feb,
year = "2026",
address = "Trento, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.iwsds-1.33/",
pages = "326--337",
abstract = "In empathic dialogue systems, it is crucial to continuously monitor and adapt to the user{'}s emotional state. To capture user-specific mappings between multimodal behaviors and emotional states, directly asking users about their emotions during dialogue is the most straightforward and effective approach. However, frequent questioning can cause inconvenience to users and diminish the user experience, so the number of queries should be minimized. In this study, we formulate personalized multimodal sentiment analysis ({MSA}) as a stream-based active learning problem, where user behaviors are observed sequentially, and we assume that the system has an ability to decide at each step whether to request an emotion label from the user. Simulation experiments using a human{--}agent dialogue corpus demonstrate that the proposed method efficiently improves performance even under few-shot conditions. These results indicate that our approach is effective for developing dialogue systems that achieve cost-efficient personalized {MSA}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ajichi-etal-2026-adaptive">
<titleInfo>
<title>Adaptive Multimodal Sentiment Analysis with Stream-Based Active Learning for Spoken Dialogue Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atsuto</namePart>
<namePart type="family">Ajichi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takato</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazunori</namePart>
<namePart type="family">Komatani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shogo</namePart>
<namePart type="family">Okada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-02</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Workshop on Spoken Dialogue System Technology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Riccardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seyed</namePart>
<namePart type="given">Mahed</namePart>
<namePart type="family">Mousavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Ines</namePart>
<namePart type="family">Torres</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koichiro</namePart>
<namePart type="family">Yoshino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zoraida</namePart>
<namePart type="family">Callejas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="given">Absar</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Bechet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Gustafson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Géraldine</namePart>
<namePart type="family">Damnati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="given">Fernando</namePart>
<namePart type="family">D’Haro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Mendonça</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raffaella</namePart>
<namePart type="family">Bernardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="given">”Pino”</namePart>
<namePart type="family">Di Fabbrizio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gokhan</namePart>
<namePart type="family">Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Johnston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Trento, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In empathic dialogue systems, it is crucial to continuously monitor and adapt to the user’s emotional state. To capture user-specific mappings between multimodal behaviors and emotional states, directly asking users about their emotions during dialogue is the most straightforward and effective approach. However, frequent questioning can cause inconvenience to users and diminish the user experience, so the number of queries should be minimized. In this study, we formulate personalized multimodal sentiment analysis (MSA) as a stream-based active learning problem, where user behaviors are observed sequentially, and we assume that the system has an ability to decide at each step whether to request an emotion label from the user. Simulation experiments using a human–agent dialogue corpus demonstrate that the proposed method efficiently improves performance even under few-shot conditions. These results indicate that our approach is effective for developing dialogue systems that achieve cost-efficient personalized MSA.</abstract>
<identifier type="citekey">ajichi-etal-2026-adaptive</identifier>
<location>
<url>https://aclanthology.org/2026.iwsds-1.33/</url>
</location>
<part>
<date>2026-02</date>
<extent unit="page">
<start>326</start>
<end>337</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adaptive Multimodal Sentiment Analysis with Stream-Based Active Learning for Spoken Dialogue Systems
%A Ajichi, Atsuto
%A Hayashi, Takato
%A Komatani, Kazunori
%A Okada, Shogo
%Y Riccardi, Giuseppe
%Y Mousavi, Seyed Mahed
%Y Torres, Maria Ines
%Y Yoshino, Koichiro
%Y Callejas, Zoraida
%Y Chowdhury, Shammur Absar
%Y Chen, Yun-Nung
%Y Bechet, Frederic
%Y Gustafson, Joakim
%Y Damnati, Géraldine
%Y Papangelis, Alex
%Y D’Haro, Luis Fernando
%Y Mendonça, John
%Y Bernardi, Raffaella
%Y Hakkani-Tur, Dilek
%Y Di Fabbrizio, Giuseppe ”Pino”
%Y Kawahara, Tatsuya
%Y Alam, Firoj
%Y Tur, Gokhan
%Y Johnston, Michael
%S Proceedings of the 16th International Workshop on Spoken Dialogue System Technology
%D 2026
%8 February
%I Association for Computational Linguistics
%C Trento, Italy
%F ajichi-etal-2026-adaptive
%X In empathic dialogue systems, it is crucial to continuously monitor and adapt to the user’s emotional state. To capture user-specific mappings between multimodal behaviors and emotional states, directly asking users about their emotions during dialogue is the most straightforward and effective approach. However, frequent questioning can cause inconvenience to users and diminish the user experience, so the number of queries should be minimized. In this study, we formulate personalized multimodal sentiment analysis (MSA) as a stream-based active learning problem, where user behaviors are observed sequentially, and we assume that the system has an ability to decide at each step whether to request an emotion label from the user. Simulation experiments using a human–agent dialogue corpus demonstrate that the proposed method efficiently improves performance even under few-shot conditions. These results indicate that our approach is effective for developing dialogue systems that achieve cost-efficient personalized MSA.
%U https://aclanthology.org/2026.iwsds-1.33/
%P 326-337
Markdown (Informal)
[Adaptive Multimodal Sentiment Analysis with Stream-Based Active Learning for Spoken Dialogue Systems](https://aclanthology.org/2026.iwsds-1.33/) (Ajichi et al., IWSDS 2026)
ACL