@inproceedings{gaido-etal-2021-ctc,
title = "{CTC}-based Compression for Direct Speech Translation",
author = "Gaido, Marco and
Cettolo, Mauro and
Negri, Matteo and
Turchi, Marco",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.57",
doi = "10.18653/v1/2021.eacl-main.57",
pages = "690--696",
abstract = "Previous studies demonstrated that a dynamic phone-informed compression of the input audio is beneficial for speech translation (ST). However, they required a dedicated model for phone recognition and did not test this solution for direct ST, in which a single model translates the input audio into the target language without intermediate representations. In this work, we propose the first method able to perform a dynamic compression of the input in direct ST models. In particular, we exploit the Connectionist Temporal Classification (CTC) to compress the input sequence according to its phonetic characteristics. Our experiments demonstrate that our solution brings a 1.3-1.5 BLEU improvement over a strong baseline on two language pairs (English-Italian and English-German), contextually reducing the memory footprint by more than 10{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gaido-etal-2021-ctc">
<titleInfo>
<title>CTC-based Compression for Direct Speech Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Gaido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mauro</namePart>
<namePart type="family">Cettolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous studies demonstrated that a dynamic phone-informed compression of the input audio is beneficial for speech translation (ST). However, they required a dedicated model for phone recognition and did not test this solution for direct ST, in which a single model translates the input audio into the target language without intermediate representations. In this work, we propose the first method able to perform a dynamic compression of the input in direct ST models. In particular, we exploit the Connectionist Temporal Classification (CTC) to compress the input sequence according to its phonetic characteristics. Our experiments demonstrate that our solution brings a 1.3-1.5 BLEU improvement over a strong baseline on two language pairs (English-Italian and English-German), contextually reducing the memory footprint by more than 10%.</abstract>
<identifier type="citekey">gaido-etal-2021-ctc</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.57</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.57</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>690</start>
<end>696</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CTC-based Compression for Direct Speech Translation
%A Gaido, Marco
%A Cettolo, Mauro
%A Negri, Matteo
%A Turchi, Marco
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F gaido-etal-2021-ctc
%X Previous studies demonstrated that a dynamic phone-informed compression of the input audio is beneficial for speech translation (ST). However, they required a dedicated model for phone recognition and did not test this solution for direct ST, in which a single model translates the input audio into the target language without intermediate representations. In this work, we propose the first method able to perform a dynamic compression of the input in direct ST models. In particular, we exploit the Connectionist Temporal Classification (CTC) to compress the input sequence according to its phonetic characteristics. Our experiments demonstrate that our solution brings a 1.3-1.5 BLEU improvement over a strong baseline on two language pairs (English-Italian and English-German), contextually reducing the memory footprint by more than 10%.
%R 10.18653/v1/2021.eacl-main.57
%U https://aclanthology.org/2021.eacl-main.57
%U https://doi.org/10.18653/v1/2021.eacl-main.57
%P 690-696
Markdown (Informal)
[CTC-based Compression for Direct Speech Translation](https://aclanthology.org/2021.eacl-main.57) (Gaido et al., EACL 2021)
ACL
- Marco Gaido, Mauro Cettolo, Matteo Negri, and Marco Turchi. 2021. CTC-based Compression for Direct Speech Translation. In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pages 690–696, Online. Association for Computational Linguistics.