@inproceedings{bentes-safka-2026-pinch,
title = "Pinch-{AST}: Robust Cascaded Speech Translation System for the {IWSLT} 2026 Simultaneous Speech Translation Task",
author = "Bentes, Carlos and
Safka, Christian",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.iwslt-1.30/",
pages = "268--271",
ISBN = "979-8-89176-411-8",
abstract = "We describe Pinch-AST, our submission to the IWSLT 2026 Simultaneous Speech-to-Text Translation shared task, covering all four official directions (En {\textrightarrow} De, En {\textrightarrow} It, En {\textrightarrow} Zh, Cs {\textrightarrow} En) under both low- and high- latency regimes. Pinch-AST is a cascaded system pairing off-the-shelf speech models with a translation backbone adapted per language pair via LoRA on ASR-noise-augmented parallel data. The streaming policy is a character-level longest-common-prefix re-translation strategy, and the full pipeline runs on a single H100 80 GB GPU within the real-time budget. Evaluated on the IWSLT 2026 development set, Pinch-AST achieves competitive quality{--}latency trade-offs across all four language pairs in both latency regimes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bentes-safka-2026-pinch">
<titleInfo>
<title>Pinch-AST: Robust Cascaded Speech Translation System for the IWSLT 2026 Simultaneous Speech Translation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Bentes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Safka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-411-8</identifier>
</relatedItem>
<abstract>We describe Pinch-AST, our submission to the IWSLT 2026 Simultaneous Speech-to-Text Translation shared task, covering all four official directions (En → De, En → It, En → Zh, Cs → En) under both low- and high- latency regimes. Pinch-AST is a cascaded system pairing off-the-shelf speech models with a translation backbone adapted per language pair via LoRA on ASR-noise-augmented parallel data. The streaming policy is a character-level longest-common-prefix re-translation strategy, and the full pipeline runs on a single H100 80 GB GPU within the real-time budget. Evaluated on the IWSLT 2026 development set, Pinch-AST achieves competitive quality–latency trade-offs across all four language pairs in both latency regimes.</abstract>
<identifier type="citekey">bentes-safka-2026-pinch</identifier>
<location>
<url>https://aclanthology.org/2026.iwslt-1.30/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>268</start>
<end>271</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pinch-AST: Robust Cascaded Speech Translation System for the IWSLT 2026 Simultaneous Speech Translation Task
%A Bentes, Carlos
%A Safka, Christian
%Y Salesky, Elizabeth
%Y Anastasopoulos, Antonios
%Y Negri, Matteo
%Y Federico, Marcello
%S Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA (in-person and online)
%@ 979-8-89176-411-8
%F bentes-safka-2026-pinch
%X We describe Pinch-AST, our submission to the IWSLT 2026 Simultaneous Speech-to-Text Translation shared task, covering all four official directions (En → De, En → It, En → Zh, Cs → En) under both low- and high- latency regimes. Pinch-AST is a cascaded system pairing off-the-shelf speech models with a translation backbone adapted per language pair via LoRA on ASR-noise-augmented parallel data. The streaming policy is a character-level longest-common-prefix re-translation strategy, and the full pipeline runs on a single H100 80 GB GPU within the real-time budget. Evaluated on the IWSLT 2026 development set, Pinch-AST achieves competitive quality–latency trade-offs across all four language pairs in both latency regimes.
%U https://aclanthology.org/2026.iwslt-1.30/
%P 268-271
Markdown (Informal)
[Pinch-AST: Robust Cascaded Speech Translation System for the IWSLT 2026 Simultaneous Speech Translation Task](https://aclanthology.org/2026.iwslt-1.30/) (Bentes & Safka, IWSLT 2026)
ACL