@inproceedings{mondal-etal-2025-quantum,
title = "Quantum-Infused Whisper: A Framework for Replacing Classical Components",
author = "Mondal, Tapabrata and
Dhar, Debjit and
Lahiri, Soham and
Bandyopadhyay, Sivaji",
editor = "Pal, Santanu and
Pakray, Partha and
Jain, Priyanka and
Ekbal, Asif and
Bandyopadhyay, Sivaji",
booktitle = "Proceedings of the QuantumNLP{\{}:{\}} Integrating Quantum Computing with Natural Language Processing",
month = nov,
year = "2025",
address = "Mumbai, India (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.quantumnlp-1.1/",
pages = "1--5",
ISBN = "979-8-89176-306-7",
abstract = "We propose a compact hybrid quantum{--}classical extension of OpenAI{'}s Whisper in which classical components are replaced by Quantum Convolutional Neural Networks (QCNN), Quantum LSTMs (QLSTM), and optional Quantum Adaptive Self-Attention (QASA). Log-mel spectrograms are angle encoded and processed by QCNN kernels, whose outputs feed a Transformer encoder, while QLSTM-based decoding introduces quantum-enhanced temporal modeling. The design incorporates pretrained acoustic embeddings and is constrained to NISQ-feasible circuit depths and qubit counts. Although this work is primarily architectural, we provide a fully specified, reproducible evaluation plan using Speech Commands, LibriSpeech, and Common Voice, along with strong classical baselines and measurable hypotheses for assessing noise robustness, efficiency, and parameter sparsity. To our knowledge, this is the first hardware-aware, module-wise quantum replacement framework for Whisper."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mondal-etal-2025-quantum">
<titleInfo>
<title>Quantum-Infused Whisper: A Framework for Replacing Classical Components</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tapabrata</namePart>
<namePart type="family">Mondal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debjit</namePart>
<namePart type="family">Dhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soham</namePart>
<namePart type="family">Lahiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the QuantumNLP{:} Integrating Quantum Computing with Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santanu</namePart>
<namePart type="family">Pal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Partha</namePart>
<namePart type="family">Pakray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priyanka</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-306-7</identifier>
</relatedItem>
<abstract>We propose a compact hybrid quantum–classical extension of OpenAI’s Whisper in which classical components are replaced by Quantum Convolutional Neural Networks (QCNN), Quantum LSTMs (QLSTM), and optional Quantum Adaptive Self-Attention (QASA). Log-mel spectrograms are angle encoded and processed by QCNN kernels, whose outputs feed a Transformer encoder, while QLSTM-based decoding introduces quantum-enhanced temporal modeling. The design incorporates pretrained acoustic embeddings and is constrained to NISQ-feasible circuit depths and qubit counts. Although this work is primarily architectural, we provide a fully specified, reproducible evaluation plan using Speech Commands, LibriSpeech, and Common Voice, along with strong classical baselines and measurable hypotheses for assessing noise robustness, efficiency, and parameter sparsity. To our knowledge, this is the first hardware-aware, module-wise quantum replacement framework for Whisper.</abstract>
<identifier type="citekey">mondal-etal-2025-quantum</identifier>
<location>
<url>https://aclanthology.org/2025.quantumnlp-1.1/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>1</start>
<end>5</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quantum-Infused Whisper: A Framework for Replacing Classical Components
%A Mondal, Tapabrata
%A Dhar, Debjit
%A Lahiri, Soham
%A Bandyopadhyay, Sivaji
%Y Pal, Santanu
%Y Pakray, Partha
%Y Jain, Priyanka
%Y Ekbal, Asif
%Y Bandyopadhyay, Sivaji
%S Proceedings of the QuantumNLP{:} Integrating Quantum Computing with Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Mumbai, India (Hybrid)
%@ 979-8-89176-306-7
%F mondal-etal-2025-quantum
%X We propose a compact hybrid quantum–classical extension of OpenAI’s Whisper in which classical components are replaced by Quantum Convolutional Neural Networks (QCNN), Quantum LSTMs (QLSTM), and optional Quantum Adaptive Self-Attention (QASA). Log-mel spectrograms are angle encoded and processed by QCNN kernels, whose outputs feed a Transformer encoder, while QLSTM-based decoding introduces quantum-enhanced temporal modeling. The design incorporates pretrained acoustic embeddings and is constrained to NISQ-feasible circuit depths and qubit counts. Although this work is primarily architectural, we provide a fully specified, reproducible evaluation plan using Speech Commands, LibriSpeech, and Common Voice, along with strong classical baselines and measurable hypotheses for assessing noise robustness, efficiency, and parameter sparsity. To our knowledge, this is the first hardware-aware, module-wise quantum replacement framework for Whisper.
%U https://aclanthology.org/2025.quantumnlp-1.1/
%P 1-5Markdown (Informal)
[Quantum-Infused Whisper: A Framework for Replacing Classical Components](https://aclanthology.org/2025.quantumnlp-1.1/) (Mondal et al., QuantumNLP 2025)
ACL