@inproceedings{tan-etal-2025-streaming,
title = "Streaming Sequence Transduction through Dynamic Compression",
author = "Tan, Weiting and
Chen, Yunmo and
Chen, Tongfei and
Qin, Guanghui and
Xu, Haoran and
Zhang, Chenyu and
Van Durme, Benjamin and
Koehn, Philipp",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Anastasopoulos, Antonis",
booktitle = "Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.iwslt-1.1/",
doi = "10.18653/v1/2025.iwslt-1.1",
pages = "1--18",
ISBN = "979-8-89176-272-5",
abstract = "We introduce STAR (Stream Transduction with Anchor Representations), a novel Transformer-based model designed for efficient sequence-to-sequence transduction over streams. STAR dynamically segments input streams to create compressed anchor representations, achieving nearly lossless (12x) compression in Automatic Speech Recognition (ASR) and outperforming existing methods. Moreover, STAR demonstrates superior segmentation and latency-quality trade-offs in simultaneous Speech Translation, optimizing latency, memory footprint, and quality."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tan-etal-2025-streaming">
<titleInfo>
<title>Streaming Sequence Transduction through Dynamic Compression</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weiting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunmo</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tongfei</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guanghui</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoran</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Van Durme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonis</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-272-5</identifier>
</relatedItem>
<abstract>We introduce STAR (Stream Transduction with Anchor Representations), a novel Transformer-based model designed for efficient sequence-to-sequence transduction over streams. STAR dynamically segments input streams to create compressed anchor representations, achieving nearly lossless (12x) compression in Automatic Speech Recognition (ASR) and outperforming existing methods. Moreover, STAR demonstrates superior segmentation and latency-quality trade-offs in simultaneous Speech Translation, optimizing latency, memory footprint, and quality.</abstract>
<identifier type="citekey">tan-etal-2025-streaming</identifier>
<identifier type="doi">10.18653/v1/2025.iwslt-1.1</identifier>
<location>
<url>https://aclanthology.org/2025.iwslt-1.1/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1</start>
<end>18</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Streaming Sequence Transduction through Dynamic Compression
%A Tan, Weiting
%A Chen, Yunmo
%A Chen, Tongfei
%A Qin, Guanghui
%A Xu, Haoran
%A Zhang, Chenyu
%A Van Durme, Benjamin
%A Koehn, Philipp
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Anastasopoulos, Antonis
%S Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria (in-person and online)
%@ 979-8-89176-272-5
%F tan-etal-2025-streaming
%X We introduce STAR (Stream Transduction with Anchor Representations), a novel Transformer-based model designed for efficient sequence-to-sequence transduction over streams. STAR dynamically segments input streams to create compressed anchor representations, achieving nearly lossless (12x) compression in Automatic Speech Recognition (ASR) and outperforming existing methods. Moreover, STAR demonstrates superior segmentation and latency-quality trade-offs in simultaneous Speech Translation, optimizing latency, memory footprint, and quality.
%R 10.18653/v1/2025.iwslt-1.1
%U https://aclanthology.org/2025.iwslt-1.1/
%U https://doi.org/10.18653/v1/2025.iwslt-1.1
%P 1-18
Markdown (Informal)
[Streaming Sequence Transduction through Dynamic Compression](https://aclanthology.org/2025.iwslt-1.1/) (Tan et al., IWSLT 2025)
ACL
- Weiting Tan, Yunmo Chen, Tongfei Chen, Guanghui Qin, Haoran Xu, Chenyu Zhang, Benjamin Van Durme, and Philipp Koehn. 2025. Streaming Sequence Transduction through Dynamic Compression. In Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025), pages 1–18, Vienna, Austria (in-person and online). Association for Computational Linguistics.