@inproceedings{zhang-etal-2026-generative,
title = "Generative-to-Discriminative Test-Time Adaptation via Manifold-Aware Diffusion and {B}ayesian Distillation",
author = "Zhang, Boyun and
Xie, Zequn and
Feng, Fangming and
Zhang, Zihan and
He, Yongbo and
Wang, Chuxin and
Cai, Sihang and
Jin, Tao and
Zhang, Qifei",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.350/",
pages = "7052--7062",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal Sentiment Analysis (MSA) models typically suffer significant performance degradation under domain shifts. While Test-Time Adaptation (TTA) aims to mitigate this, existing discriminative approaches often succumb to ``confident but wrong'' predictions on out-of-distribution samples. Conversely, generative models offer robust calibration but incur prohibitive computational costs. To bridge this gap, we propose GD-Adapt (Generative-Discriminative Adaptation), a novel TTA framework that harmonizes the robustness of generative diffusion models with the efficiency of discriminative regression networks via Bayesian Diffusion Distillation (BDD). Specifically, we introduce Auxiliary Generative Regularization (AGR) during pretraining to enforce manifold-aware feature learning. Extensive experiments across five cross-domain scenarios demonstrate our method{'}s superiority. For instance, on the challenging MOSI to SIMS shift, GD-Adapt reduces Mean Absolute Error (MAE) from 0.6872 to 0.5673 and boosts binary accuracy by 5.81 percentage points (reaching 57.33{\%}). Notably, in scenarios such as SIMS to MOSI, we achieve an 11.18-point gain over the non-adapted baseline."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2026-generative">
<titleInfo>
<title>Generative-to-Discriminative Test-Time Adaptation via Manifold-Aware Diffusion and Bayesian Distillation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Boyun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zequn</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fangming</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zihan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongbo</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuxin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sihang</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qifei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multimodal Sentiment Analysis (MSA) models typically suffer significant performance degradation under domain shifts. While Test-Time Adaptation (TTA) aims to mitigate this, existing discriminative approaches often succumb to “confident but wrong” predictions on out-of-distribution samples. Conversely, generative models offer robust calibration but incur prohibitive computational costs. To bridge this gap, we propose GD-Adapt (Generative-Discriminative Adaptation), a novel TTA framework that harmonizes the robustness of generative diffusion models with the efficiency of discriminative regression networks via Bayesian Diffusion Distillation (BDD). Specifically, we introduce Auxiliary Generative Regularization (AGR) during pretraining to enforce manifold-aware feature learning. Extensive experiments across five cross-domain scenarios demonstrate our method’s superiority. For instance, on the challenging MOSI to SIMS shift, GD-Adapt reduces Mean Absolute Error (MAE) from 0.6872 to 0.5673 and boosts binary accuracy by 5.81 percentage points (reaching 57.33%). Notably, in scenarios such as SIMS to MOSI, we achieve an 11.18-point gain over the non-adapted baseline.</abstract>
<identifier type="citekey">zhang-etal-2026-generative</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.350/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>7052</start>
<end>7062</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generative-to-Discriminative Test-Time Adaptation via Manifold-Aware Diffusion and Bayesian Distillation
%A Zhang, Boyun
%A Xie, Zequn
%A Feng, Fangming
%A Zhang, Zihan
%A He, Yongbo
%A Wang, Chuxin
%A Cai, Sihang
%A Jin, Tao
%A Zhang, Qifei
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F zhang-etal-2026-generative
%X Multimodal Sentiment Analysis (MSA) models typically suffer significant performance degradation under domain shifts. While Test-Time Adaptation (TTA) aims to mitigate this, existing discriminative approaches often succumb to “confident but wrong” predictions on out-of-distribution samples. Conversely, generative models offer robust calibration but incur prohibitive computational costs. To bridge this gap, we propose GD-Adapt (Generative-Discriminative Adaptation), a novel TTA framework that harmonizes the robustness of generative diffusion models with the efficiency of discriminative regression networks via Bayesian Diffusion Distillation (BDD). Specifically, we introduce Auxiliary Generative Regularization (AGR) during pretraining to enforce manifold-aware feature learning. Extensive experiments across five cross-domain scenarios demonstrate our method’s superiority. For instance, on the challenging MOSI to SIMS shift, GD-Adapt reduces Mean Absolute Error (MAE) from 0.6872 to 0.5673 and boosts binary accuracy by 5.81 percentage points (reaching 57.33%). Notably, in scenarios such as SIMS to MOSI, we achieve an 11.18-point gain over the non-adapted baseline.
%U https://aclanthology.org/2026.findings-acl.350/
%P 7052-7062
Markdown (Informal)
[Generative-to-Discriminative Test-Time Adaptation via Manifold-Aware Diffusion and Bayesian Distillation](https://aclanthology.org/2026.findings-acl.350/) (Zhang et al., Findings 2026)
ACL
- Boyun Zhang, Zequn Xie, Fangming Feng, Zihan Zhang, Yongbo He, Chuxin Wang, Sihang Cai, Tao Jin, and Qifei Zhang. 2026. Generative-to-Discriminative Test-Time Adaptation via Manifold-Aware Diffusion and Bayesian Distillation. In Findings of the Association for Computational Linguistics: ACL 2026, pages 7052–7062, San Diego, California, United States. Association for Computational Linguistics.