@inproceedings{hussein-etal-2023-jhu,
title = "{JHU} {IWSLT} 2023 Dialect Speech Translation System Description",
author = "Hussein, Amir and
Xiao, Cihan and
Verma, Neha and
Thebaud, Thomas and
Wiesner, Matthew and
Khudanpur, Sanjeev",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.26",
doi = "10.18653/v1/2023.iwslt-1.26",
pages = "283--290",
abstract = "This paper presents JHU{'}s submissions to the IWSLT 2023 dialectal and low-resource track of Tunisian Arabic to English speech translation. The Tunisian dialect lacks formal orthography and abundant training data, making it challenging to develop effective speech translation (ST) systems. To address these challenges, we explore the integration of large pre-trained machine translation (MT) models, such as mBART and NLLB-200 in both end-to-end (E2E) and cascaded speech translation (ST) systems. We also improve the performance of automatic speech recognition (ASR) through the use of pseudo-labeling data augmentation and channel matching on telephone data. Finally, we combine our E2E and cascaded ST systems with Minimum Bayes-Risk decoding. Our combined system achieves a BLEU score of 21.6 and 19.1 on test2 and test3, respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hussein-etal-2023-jhu">
<titleInfo>
<title>JHU IWSLT 2023 Dialect Speech Translation System Description</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Hussein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cihan</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neha</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Thebaud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Wiesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjeev</namePart>
<namePart type="family">Khudanpur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents JHU’s submissions to the IWSLT 2023 dialectal and low-resource track of Tunisian Arabic to English speech translation. The Tunisian dialect lacks formal orthography and abundant training data, making it challenging to develop effective speech translation (ST) systems. To address these challenges, we explore the integration of large pre-trained machine translation (MT) models, such as mBART and NLLB-200 in both end-to-end (E2E) and cascaded speech translation (ST) systems. We also improve the performance of automatic speech recognition (ASR) through the use of pseudo-labeling data augmentation and channel matching on telephone data. Finally, we combine our E2E and cascaded ST systems with Minimum Bayes-Risk decoding. Our combined system achieves a BLEU score of 21.6 and 19.1 on test2 and test3, respectively.</abstract>
<identifier type="citekey">hussein-etal-2023-jhu</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.26</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.26</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>283</start>
<end>290</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JHU IWSLT 2023 Dialect Speech Translation System Description
%A Hussein, Amir
%A Xiao, Cihan
%A Verma, Neha
%A Thebaud, Thomas
%A Wiesner, Matthew
%A Khudanpur, Sanjeev
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F hussein-etal-2023-jhu
%X This paper presents JHU’s submissions to the IWSLT 2023 dialectal and low-resource track of Tunisian Arabic to English speech translation. The Tunisian dialect lacks formal orthography and abundant training data, making it challenging to develop effective speech translation (ST) systems. To address these challenges, we explore the integration of large pre-trained machine translation (MT) models, such as mBART and NLLB-200 in both end-to-end (E2E) and cascaded speech translation (ST) systems. We also improve the performance of automatic speech recognition (ASR) through the use of pseudo-labeling data augmentation and channel matching on telephone data. Finally, we combine our E2E and cascaded ST systems with Minimum Bayes-Risk decoding. Our combined system achieves a BLEU score of 21.6 and 19.1 on test2 and test3, respectively.
%R 10.18653/v1/2023.iwslt-1.26
%U https://aclanthology.org/2023.iwslt-1.26
%U https://doi.org/10.18653/v1/2023.iwslt-1.26
%P 283-290
Markdown (Informal)
[JHU IWSLT 2023 Dialect Speech Translation System Description](https://aclanthology.org/2023.iwslt-1.26) (Hussein et al., IWSLT 2023)
ACL
- Amir Hussein, Cihan Xiao, Neha Verma, Thomas Thebaud, Matthew Wiesner, and Sanjeev Khudanpur. 2023. JHU IWSLT 2023 Dialect Speech Translation System Description. In Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023), pages 283–290, Toronto, Canada (in-person and online). Association for Computational Linguistics.