@inproceedings{polak-etal-2023-towards,
title = "Towards Efficient Simultaneous Speech Translation: {CUNI}-{KIT} System for Simultaneous Track at {IWSLT} 2023",
author = "Pol{\'a}k, Peter and
Liu, Danni and
Pham, Ngoc-Quan and
Niehues, Jan and
Waibel, Alexander and
Bojar, Ond{\v{r}}ej",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.37",
doi = "10.18653/v1/2023.iwslt-1.37",
pages = "389--396",
abstract = "In this paper, we describe our submission to the Simultaneous Track at IWSLT 2023. This year, we continue with the successful setup from the last year, however, we adopt the latest methods that further improve the translation quality. Additionally, we propose a novel online policy for attentional encoder-decoder models. The policy prevents the model to generate translation beyond the current speech input by using an auxiliary CTC output layer. We show that the proposed simultaneous policy can be applied to both streaming blockwise models and offline encoder-decoder models. We observe significant improvements in quality (up to 1.1 BLEU) and the computational footprint (up to 45{\%} relative RTF).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="polak-etal-2023-towards">
<titleInfo>
<title>Towards Efficient Simultaneous Speech Translation: CUNI-KIT System for Simultaneous Track at IWSLT 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Polák</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danni</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ngoc-Quan</namePart>
<namePart type="family">Pham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe our submission to the Simultaneous Track at IWSLT 2023. This year, we continue with the successful setup from the last year, however, we adopt the latest methods that further improve the translation quality. Additionally, we propose a novel online policy for attentional encoder-decoder models. The policy prevents the model to generate translation beyond the current speech input by using an auxiliary CTC output layer. We show that the proposed simultaneous policy can be applied to both streaming blockwise models and offline encoder-decoder models. We observe significant improvements in quality (up to 1.1 BLEU) and the computational footprint (up to 45% relative RTF).</abstract>
<identifier type="citekey">polak-etal-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.37</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.37</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>389</start>
<end>396</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Efficient Simultaneous Speech Translation: CUNI-KIT System for Simultaneous Track at IWSLT 2023
%A Polák, Peter
%A Liu, Danni
%A Pham, Ngoc-Quan
%A Niehues, Jan
%A Waibel, Alexander
%A Bojar, Ondřej
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F polak-etal-2023-towards
%X In this paper, we describe our submission to the Simultaneous Track at IWSLT 2023. This year, we continue with the successful setup from the last year, however, we adopt the latest methods that further improve the translation quality. Additionally, we propose a novel online policy for attentional encoder-decoder models. The policy prevents the model to generate translation beyond the current speech input by using an auxiliary CTC output layer. We show that the proposed simultaneous policy can be applied to both streaming blockwise models and offline encoder-decoder models. We observe significant improvements in quality (up to 1.1 BLEU) and the computational footprint (up to 45% relative RTF).
%R 10.18653/v1/2023.iwslt-1.37
%U https://aclanthology.org/2023.iwslt-1.37
%U https://doi.org/10.18653/v1/2023.iwslt-1.37
%P 389-396
Markdown (Informal)
[Towards Efficient Simultaneous Speech Translation: CUNI-KIT System for Simultaneous Track at IWSLT 2023](https://aclanthology.org/2023.iwslt-1.37) (Polák et al., IWSLT 2023)
ACL