@inproceedings{shang-etal-2023-hw,
title = "The {HW}-{TSC}{'}s Simultaneous Speech-to-Speech Translation System for {IWSLT} 2023 Evaluation",
author = "Shang, Hengchao and
Rao, Zhiqiang and
Li, Zongyao and
Wu, Zhanglin and
Guo, Jiaxin and
Wang, Minghan and
Wei, Daimeng and
Li, Shaojun and
Yu, Zhengzhe and
Chen, Xiaoyu and
Lei, Lizhi and
Yang, Hao",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.36",
doi = "10.18653/v1/2023.iwslt-1.36",
pages = "383--388",
abstract = "In this paper, we present our submission to the IWSLT 2023 Simultaneous Speech-to-Speech Translation competition. Our participation involves three language directions: English-German, English-Chinese, and English-Japanese. Our solution is a cascaded incremental decoding system, consisting of an ASR model, an MT model, and a TTS model. By adopting the strategies used in the Speech-to-Text track, we have managed to generate a more confident target text for each audio segment input, which can guide the next MT incremental decoding process. Additionally, we have integrated the TTS model to seamlessly reproduce audio files from the translation hypothesis. To enhance the effectiveness of our experiment, we have utilized a range of methods to reduce error conditions in the TTS input text and improve the smoothness of the TTS output audio.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shang-etal-2023-hw">
<titleInfo>
<title>The HW-TSC’s Simultaneous Speech-to-Speech Translation System for IWSLT 2023 Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hengchao</namePart>
<namePart type="family">Shang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zongyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhanglin</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaxin</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minghan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daimeng</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaojun</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengzhe</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lizhi</namePart>
<namePart type="family">Lei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our submission to the IWSLT 2023 Simultaneous Speech-to-Speech Translation competition. Our participation involves three language directions: English-German, English-Chinese, and English-Japanese. Our solution is a cascaded incremental decoding system, consisting of an ASR model, an MT model, and a TTS model. By adopting the strategies used in the Speech-to-Text track, we have managed to generate a more confident target text for each audio segment input, which can guide the next MT incremental decoding process. Additionally, we have integrated the TTS model to seamlessly reproduce audio files from the translation hypothesis. To enhance the effectiveness of our experiment, we have utilized a range of methods to reduce error conditions in the TTS input text and improve the smoothness of the TTS output audio.</abstract>
<identifier type="citekey">shang-etal-2023-hw</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.36</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.36</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>383</start>
<end>388</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The HW-TSC’s Simultaneous Speech-to-Speech Translation System for IWSLT 2023 Evaluation
%A Shang, Hengchao
%A Rao, Zhiqiang
%A Li, Zongyao
%A Wu, Zhanglin
%A Guo, Jiaxin
%A Wang, Minghan
%A Wei, Daimeng
%A Li, Shaojun
%A Yu, Zhengzhe
%A Chen, Xiaoyu
%A Lei, Lizhi
%A Yang, Hao
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F shang-etal-2023-hw
%X In this paper, we present our submission to the IWSLT 2023 Simultaneous Speech-to-Speech Translation competition. Our participation involves three language directions: English-German, English-Chinese, and English-Japanese. Our solution is a cascaded incremental decoding system, consisting of an ASR model, an MT model, and a TTS model. By adopting the strategies used in the Speech-to-Text track, we have managed to generate a more confident target text for each audio segment input, which can guide the next MT incremental decoding process. Additionally, we have integrated the TTS model to seamlessly reproduce audio files from the translation hypothesis. To enhance the effectiveness of our experiment, we have utilized a range of methods to reduce error conditions in the TTS input text and improve the smoothness of the TTS output audio.
%R 10.18653/v1/2023.iwslt-1.36
%U https://aclanthology.org/2023.iwslt-1.36
%U https://doi.org/10.18653/v1/2023.iwslt-1.36
%P 383-388
Markdown (Informal)
[The HW-TSC’s Simultaneous Speech-to-Speech Translation System for IWSLT 2023 Evaluation](https://aclanthology.org/2023.iwslt-1.36) (Shang et al., IWSLT 2023)
ACL
- Hengchao Shang, Zhiqiang Rao, Zongyao Li, Zhanglin Wu, Jiaxin Guo, Minghan Wang, Daimeng Wei, Shaojun Li, Zhengzhe Yu, Xiaoyu Chen, Lizhi Lei, and Hao Yang. 2023. The HW-TSC’s Simultaneous Speech-to-Speech Translation System for IWSLT 2023 Evaluation. In Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023), pages 383–388, Toronto, Canada (in-person and online). Association for Computational Linguistics.