@inproceedings{sonawane-afli-2026-adapt,
title = "{ADAPT}{--}{MTU} {HAI} at {IWSLT}2026: Robust Cascaded Speech Translation for {B}hojpuri{--}{H}indi and {I}rish{--}{E}nglish",
author = "Sonawane, Pournima and
Afli, Haithem",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.iwslt-1.6/",
pages = "58--67",
ISBN = "979-8-89176-411-8",
abstract = "Low-resource speech translation remains challenging due to limited data, weak ASR support, and error propagation in cascaded systems. We present the ADAPT{--}MTU HAI submission to the IWSLT 2026 Low-Resource Speech Translation task, a robust cascaded framework combining Whisper-based ASR and NLLB-200 multilingual translation for Bhojpuri{\textrightarrow}Hindi and Irish{\textrightarrow}English language pairs. We evaluate multiple ASR models and routing strategies, including direct and pivot-based translation. For Bhojpuri{\textrightarrow}Hindi, the best configuration (Whisper-large-v3 and direct NLLB) achieves BLEU 25.59, chrF++ 42.48, and TER 63.83 on the full development set, outperforming pivot and copy baselines. For Irish{\textrightarrow}English, replacing Whisper with a language-specific Wav2Vec2 ASR model improves ASR coverage from 94.8{\%} to 100{\%} on the test set while maintaining low repetition rates. Our findings highlight the critical role of ASR quality in downstream translation performance, the conditional benefits of pivot translation, and the effectiveness of modular cascaded architectures for low-resource speech translation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sonawane-afli-2026-adapt">
<titleInfo>
<title>ADAPT–MTU HAI at IWSLT2026: Robust Cascaded Speech Translation for Bhojpuri–Hindi and Irish–English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pournima</namePart>
<namePart type="family">Sonawane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haithem</namePart>
<namePart type="family">Afli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-411-8</identifier>
</relatedItem>
<abstract>Low-resource speech translation remains challenging due to limited data, weak ASR support, and error propagation in cascaded systems. We present the ADAPT–MTU HAI submission to the IWSLT 2026 Low-Resource Speech Translation task, a robust cascaded framework combining Whisper-based ASR and NLLB-200 multilingual translation for Bhojpuri→Hindi and Irish→English language pairs. We evaluate multiple ASR models and routing strategies, including direct and pivot-based translation. For Bhojpuri→Hindi, the best configuration (Whisper-large-v3 and direct NLLB) achieves BLEU 25.59, chrF++ 42.48, and TER 63.83 on the full development set, outperforming pivot and copy baselines. For Irish→English, replacing Whisper with a language-specific Wav2Vec2 ASR model improves ASR coverage from 94.8% to 100% on the test set while maintaining low repetition rates. Our findings highlight the critical role of ASR quality in downstream translation performance, the conditional benefits of pivot translation, and the effectiveness of modular cascaded architectures for low-resource speech translation.</abstract>
<identifier type="citekey">sonawane-afli-2026-adapt</identifier>
<location>
<url>https://aclanthology.org/2026.iwslt-1.6/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>58</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ADAPT–MTU HAI at IWSLT2026: Robust Cascaded Speech Translation for Bhojpuri–Hindi and Irish–English
%A Sonawane, Pournima
%A Afli, Haithem
%Y Salesky, Elizabeth
%Y Anastasopoulos, Antonios
%Y Negri, Matteo
%Y Federico, Marcello
%S Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA (in-person and online)
%@ 979-8-89176-411-8
%F sonawane-afli-2026-adapt
%X Low-resource speech translation remains challenging due to limited data, weak ASR support, and error propagation in cascaded systems. We present the ADAPT–MTU HAI submission to the IWSLT 2026 Low-Resource Speech Translation task, a robust cascaded framework combining Whisper-based ASR and NLLB-200 multilingual translation for Bhojpuri→Hindi and Irish→English language pairs. We evaluate multiple ASR models and routing strategies, including direct and pivot-based translation. For Bhojpuri→Hindi, the best configuration (Whisper-large-v3 and direct NLLB) achieves BLEU 25.59, chrF++ 42.48, and TER 63.83 on the full development set, outperforming pivot and copy baselines. For Irish→English, replacing Whisper with a language-specific Wav2Vec2 ASR model improves ASR coverage from 94.8% to 100% on the test set while maintaining low repetition rates. Our findings highlight the critical role of ASR quality in downstream translation performance, the conditional benefits of pivot translation, and the effectiveness of modular cascaded architectures for low-resource speech translation.
%U https://aclanthology.org/2026.iwslt-1.6/
%P 58-67
Markdown (Informal)
[ADAPT–MTU HAI at IWSLT2026: Robust Cascaded Speech Translation for Bhojpuri–Hindi and Irish–English](https://aclanthology.org/2026.iwslt-1.6/) (Sonawane & Afli, IWSLT 2026)
ACL