@inproceedings{hrinchuk-etal-2023-nvidia,
title = "{NVIDIA} {N}e{M}o Offline Speech Translation Systems for {IWSLT} 2023",
author = "Hrinchuk, Oleksii and
Bataev, Vladimir and
Bakhturina, Evelina and
Ginsburg, Boris",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.42",
doi = "10.18653/v1/2023.iwslt-1.42",
pages = "442--448",
abstract = "This paper provides an overview of NVIDIA NeMo{'}s speech translation systems for the IWSLT 2023 Offline Speech Translation Task. This year, we focused on end-to-end system which capitalizes on pre-trained models and synthetic data to mitigate the problem of direct speech translation data scarcity. When trained on IWSLT 2022 constrained data, our best En-{\textgreater}De end-to-end model achieves the average score of 31 BLEU on 7 test sets from IWSLT 2010-2020 which improves over our last year cascade (28.4) and end-to-end (25.7) submissions. When trained on IWSLT 2023 constrained data, the average score drops to 29.5 BLEU.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hrinchuk-etal-2023-nvidia">
<titleInfo>
<title>NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleksii</namePart>
<namePart type="family">Hrinchuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Bataev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evelina</namePart>
<namePart type="family">Bakhturina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boris</namePart>
<namePart type="family">Ginsburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper provides an overview of NVIDIA NeMo’s speech translation systems for the IWSLT 2023 Offline Speech Translation Task. This year, we focused on end-to-end system which capitalizes on pre-trained models and synthetic data to mitigate the problem of direct speech translation data scarcity. When trained on IWSLT 2022 constrained data, our best En-\textgreaterDe end-to-end model achieves the average score of 31 BLEU on 7 test sets from IWSLT 2010-2020 which improves over our last year cascade (28.4) and end-to-end (25.7) submissions. When trained on IWSLT 2023 constrained data, the average score drops to 29.5 BLEU.</abstract>
<identifier type="citekey">hrinchuk-etal-2023-nvidia</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.42</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.42</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>442</start>
<end>448</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2023
%A Hrinchuk, Oleksii
%A Bataev, Vladimir
%A Bakhturina, Evelina
%A Ginsburg, Boris
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F hrinchuk-etal-2023-nvidia
%X This paper provides an overview of NVIDIA NeMo’s speech translation systems for the IWSLT 2023 Offline Speech Translation Task. This year, we focused on end-to-end system which capitalizes on pre-trained models and synthetic data to mitigate the problem of direct speech translation data scarcity. When trained on IWSLT 2022 constrained data, our best En-\textgreaterDe end-to-end model achieves the average score of 31 BLEU on 7 test sets from IWSLT 2010-2020 which improves over our last year cascade (28.4) and end-to-end (25.7) submissions. When trained on IWSLT 2023 constrained data, the average score drops to 29.5 BLEU.
%R 10.18653/v1/2023.iwslt-1.42
%U https://aclanthology.org/2023.iwslt-1.42
%U https://doi.org/10.18653/v1/2023.iwslt-1.42
%P 442-448
Markdown (Informal)
[NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2023](https://aclanthology.org/2023.iwslt-1.42) (Hrinchuk et al., IWSLT 2023)
ACL
- Oleksii Hrinchuk, Vladimir Bataev, Evelina Bakhturina, and Boris Ginsburg. 2023. NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2023. In Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023), pages 442–448, Toronto, Canada (in-person and online). Association for Computational Linguistics.