@inproceedings{fukuda-etal-2023-naist,
title = "{NAIST} Simultaneous Speech-to-speech Translation System for {IWSLT} 2023",
author = "Fukuda, Ryo and
Nishikawa, Yuta and
Kano, Yasumasa and
Ko, Yuka and
Yanagita, Tomoya and
Doi, Kosuke and
Makinae, Mana and
Sakti, Sakriani and
Sudoh, Katsuhito and
Nakamura, Satoshi",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.31",
doi = "10.18653/v1/2023.iwslt-1.31",
pages = "330--340",
abstract = "This paper describes NAIST{'}s submission to the IWSLT 2023 Simultaneous Speech Translation task: English-to-German, Japanese, Chinese speech-to-text translation and English-to-Japanese speech-to-speech translation. Our speech-to-text system uses an end-to-end multilingual speech translation model based on large-scale pre-trained speech and text models. We add Inter-connections into the model to incorporate the outputs from intermediate layers of the pre-trained speech model and augment prefix-to-prefix text data using Bilingual Prefix Alignment to enhance the simultaneity of the offline speech translation model. Our speech-to-speech system employs an incremental text-to-speech module that consists of a Japanese pronunciation estimation model, an acoustic model, and a neural vocoder.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fukuda-etal-2023-naist">
<titleInfo>
<title>NAIST Simultaneous Speech-to-speech Translation System for IWSLT 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryo</namePart>
<namePart type="family">Fukuda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuta</namePart>
<namePart type="family">Nishikawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasumasa</namePart>
<namePart type="family">Kano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuka</namePart>
<namePart type="family">Ko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoya</namePart>
<namePart type="family">Yanagita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kosuke</namePart>
<namePart type="family">Doi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mana</namePart>
<namePart type="family">Makinae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes NAIST’s submission to the IWSLT 2023 Simultaneous Speech Translation task: English-to-German, Japanese, Chinese speech-to-text translation and English-to-Japanese speech-to-speech translation. Our speech-to-text system uses an end-to-end multilingual speech translation model based on large-scale pre-trained speech and text models. We add Inter-connections into the model to incorporate the outputs from intermediate layers of the pre-trained speech model and augment prefix-to-prefix text data using Bilingual Prefix Alignment to enhance the simultaneity of the offline speech translation model. Our speech-to-speech system employs an incremental text-to-speech module that consists of a Japanese pronunciation estimation model, an acoustic model, and a neural vocoder.</abstract>
<identifier type="citekey">fukuda-etal-2023-naist</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.31</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.31</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>330</start>
<end>340</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NAIST Simultaneous Speech-to-speech Translation System for IWSLT 2023
%A Fukuda, Ryo
%A Nishikawa, Yuta
%A Kano, Yasumasa
%A Ko, Yuka
%A Yanagita, Tomoya
%A Doi, Kosuke
%A Makinae, Mana
%A Sakti, Sakriani
%A Sudoh, Katsuhito
%A Nakamura, Satoshi
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F fukuda-etal-2023-naist
%X This paper describes NAIST’s submission to the IWSLT 2023 Simultaneous Speech Translation task: English-to-German, Japanese, Chinese speech-to-text translation and English-to-Japanese speech-to-speech translation. Our speech-to-text system uses an end-to-end multilingual speech translation model based on large-scale pre-trained speech and text models. We add Inter-connections into the model to incorporate the outputs from intermediate layers of the pre-trained speech model and augment prefix-to-prefix text data using Bilingual Prefix Alignment to enhance the simultaneity of the offline speech translation model. Our speech-to-speech system employs an incremental text-to-speech module that consists of a Japanese pronunciation estimation model, an acoustic model, and a neural vocoder.
%R 10.18653/v1/2023.iwslt-1.31
%U https://aclanthology.org/2023.iwslt-1.31
%U https://doi.org/10.18653/v1/2023.iwslt-1.31
%P 330-340
Markdown (Informal)
[NAIST Simultaneous Speech-to-speech Translation System for IWSLT 2023](https://aclanthology.org/2023.iwslt-1.31) (Fukuda et al., IWSLT 2023)
ACL
- Ryo Fukuda, Yuta Nishikawa, Yasumasa Kano, Yuka Ko, Tomoya Yanagita, Kosuke Doi, Mana Makinae, Sakriani Sakti, Katsuhito Sudoh, and Satoshi Nakamura. 2023. NAIST Simultaneous Speech-to-speech Translation System for IWSLT 2023. In Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023), pages 330–340, Toronto, Canada (in-person and online). Association for Computational Linguistics.