@inproceedings{conti-etal-2025-unheard,
title = "The Unheard Alternative: Contrastive Explanations for Speech-to-Text Models",
author = "Conti, Lina and
Fucci, Dennis and
Gaido, Marco and
Negri, Matteo and
Wisniewski, Guillaume and
Bentivogli, Luisa",
editor = "Belinkov, Yonatan and
Mueller, Aaron and
Kim, Najoung and
Mohebbi, Hosein and
Chen, Hanjie and
Arad, Dana and
Sarti, Gabriele",
booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.blackboxnlp-1.23/",
pages = "398--414",
ISBN = "979-8-89176-346-3",
abstract = "Contrastive explanations, which indicate why an AI system produced one output (the target) instead of another (the foil), are widely recognized in explainable AI as more informative and interpretable than standard explanations. However, obtaining such explanations for speech-to-text (S2T) generative models remains an open challenge. Adopting a feature attribution framework, we propose the first method to obtain contrastive explanations in S2T by analyzing how specific regions of the input spectrogram influence the choice between alternative outputs. Through a case study on gender translation in speech translation, we show that our method accurately identifies the audio features that drive the selection of one gender over another."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="conti-etal-2025-unheard">
<titleInfo>
<title>The Unheard Alternative: Contrastive Explanations for Speech-to-Text Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Conti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dennis</namePart>
<namePart type="family">Fucci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Gaido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Wisniewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Bentivogli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Najoung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Mohebbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanjie</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Arad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriele</namePart>
<namePart type="family">Sarti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-346-3</identifier>
</relatedItem>
<abstract>Contrastive explanations, which indicate why an AI system produced one output (the target) instead of another (the foil), are widely recognized in explainable AI as more informative and interpretable than standard explanations. However, obtaining such explanations for speech-to-text (S2T) generative models remains an open challenge. Adopting a feature attribution framework, we propose the first method to obtain contrastive explanations in S2T by analyzing how specific regions of the input spectrogram influence the choice between alternative outputs. Through a case study on gender translation in speech translation, we show that our method accurately identifies the audio features that drive the selection of one gender over another.</abstract>
<identifier type="citekey">conti-etal-2025-unheard</identifier>
<location>
<url>https://aclanthology.org/2025.blackboxnlp-1.23/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>398</start>
<end>414</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Unheard Alternative: Contrastive Explanations for Speech-to-Text Models
%A Conti, Lina
%A Fucci, Dennis
%A Gaido, Marco
%A Negri, Matteo
%A Wisniewski, Guillaume
%A Bentivogli, Luisa
%Y Belinkov, Yonatan
%Y Mueller, Aaron
%Y Kim, Najoung
%Y Mohebbi, Hosein
%Y Chen, Hanjie
%Y Arad, Dana
%Y Sarti, Gabriele
%S Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-346-3
%F conti-etal-2025-unheard
%X Contrastive explanations, which indicate why an AI system produced one output (the target) instead of another (the foil), are widely recognized in explainable AI as more informative and interpretable than standard explanations. However, obtaining such explanations for speech-to-text (S2T) generative models remains an open challenge. Adopting a feature attribution framework, we propose the first method to obtain contrastive explanations in S2T by analyzing how specific regions of the input spectrogram influence the choice between alternative outputs. Through a case study on gender translation in speech translation, we show that our method accurately identifies the audio features that drive the selection of one gender over another.
%U https://aclanthology.org/2025.blackboxnlp-1.23/
%P 398-414
Markdown (Informal)
[The Unheard Alternative: Contrastive Explanations for Speech-to-Text Models](https://aclanthology.org/2025.blackboxnlp-1.23/) (Conti et al., BlackboxNLP 2025)
ACL