@inproceedings{liu-etal-2026-dream,
title = "{DREAM}-{S}: Speculative Decoding with Searchable Drafting and Target-Aware Refinement for Multimodal Generation",
author = "Liu, Zining and
Hu, Yunhai and
Xia, Tianhua and
Bao, BO and
Sather, Eric and
Thangarasa, Vithursan and
Zhang, Sai Qian",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.2177/",
doi = "10.18653/v1/2026.acl-long.2177",
pages = "47031--47045",
ISBN = "979-8-89176-390-6",
abstract = "Speculative decoding (SD) has proven to be an effective technique for accelerating autoregressive generation in large language models (LLMs), however its application to vision-language models (VLMs) remains relatively unexplored. We propose DREAM-S, a novel SD framework designed specifically for fast and efficient decoding in VLMs. DREAM-S leverages a neural architecture search (NAS) framework with target-aware supernet training to automatically identify both the optimal interaction strategy between the draft and target models, and the most suitable draft model architecture for the underlying hardware implementation platform. DREAM-S additionally incorporates adaptive intermediate feature distillation, guided by attention entropy, to enable efficient draft training. Experiments on a range of well-established VLMs show that DREAM-S achieves up to a 3.85$\times$ speedup compared to standard decoding approaches and significantly outperforms existing SD baselines."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2026-dream">
<titleInfo>
<title>DREAM-S: Speculative Decoding with Searchable Drafting and Target-Aware Refinement for Multimodal Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zining</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunhai</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianhua</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">B</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Bao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Sather</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vithursan</namePart>
<namePart type="family">Thangarasa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Qian</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Speculative decoding (SD) has proven to be an effective technique for accelerating autoregressive generation in large language models (LLMs), however its application to vision-language models (VLMs) remains relatively unexplored. We propose DREAM-S, a novel SD framework designed specifically for fast and efficient decoding in VLMs. DREAM-S leverages a neural architecture search (NAS) framework with target-aware supernet training to automatically identify both the optimal interaction strategy between the draft and target models, and the most suitable draft model architecture for the underlying hardware implementation platform. DREAM-S additionally incorporates adaptive intermediate feature distillation, guided by attention entropy, to enable efficient draft training. Experiments on a range of well-established VLMs show that DREAM-S achieves up to a 3.85\times speedup compared to standard decoding approaches and significantly outperforms existing SD baselines.</abstract>
<identifier type="citekey">liu-etal-2026-dream</identifier>
<identifier type="doi">10.18653/v1/2026.acl-long.2177</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.2177/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>47031</start>
<end>47045</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DREAM-S: Speculative Decoding with Searchable Drafting and Target-Aware Refinement for Multimodal Generation
%A Liu, Zining
%A Hu, Yunhai
%A Xia, Tianhua
%A Bao, B. O.
%A Sather, Eric
%A Thangarasa, Vithursan
%A Zhang, Sai Qian
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F liu-etal-2026-dream
%X Speculative decoding (SD) has proven to be an effective technique for accelerating autoregressive generation in large language models (LLMs), however its application to vision-language models (VLMs) remains relatively unexplored. We propose DREAM-S, a novel SD framework designed specifically for fast and efficient decoding in VLMs. DREAM-S leverages a neural architecture search (NAS) framework with target-aware supernet training to automatically identify both the optimal interaction strategy between the draft and target models, and the most suitable draft model architecture for the underlying hardware implementation platform. DREAM-S additionally incorporates adaptive intermediate feature distillation, guided by attention entropy, to enable efficient draft training. Experiments on a range of well-established VLMs show that DREAM-S achieves up to a 3.85\times speedup compared to standard decoding approaches and significantly outperforms existing SD baselines.
%R 10.18653/v1/2026.acl-long.2177
%U https://aclanthology.org/2026.acl-long.2177/
%U https://doi.org/10.18653/v1/2026.acl-long.2177
%P 47031-47045
Markdown (Informal)
[DREAM-S: Speculative Decoding with Searchable Drafting and Target-Aware Refinement for Multimodal Generation](https://aclanthology.org/2026.acl-long.2177/) (Liu et al., ACL 2026)
ACL
- Zining Liu, Yunhai Hu, Tianhua Xia, BO Bao, Eric Sather, Vithursan Thangarasa, and Sai Qian Zhang. 2026. DREAM-S: Speculative Decoding with Searchable Drafting and Target-Aware Refinement for Multimodal Generation. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 47031–47045, San Diego, California, United States. Association for Computational Linguistics.