@inproceedings{shanbhogue-etal-2022-amazon,
title = "{A}mazon {A}lexa {AI}{'}s System for {IWSLT} 2022 Offline Speech Translation Shared Task",
author = "Shanbhogue, Akshaya and
Xue, Ran and
Chang, Ching-Yun and
Campbell, Sarah",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Costa-juss{\`a}, Marta",
booktitle = "Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.iwslt-1.12",
doi = "10.18653/v1/2022.iwslt-1.12",
pages = "169--176",
abstract = "This paper describes Amazon Alexa AI{'}s submission to the IWSLT 2022 Offline Speech Translation Task. Our system is an end-to-end speech translation model that leverages pretrained models and cross modality transfer learning. We detail two improvements to the knowledge transfer schema. First, we implemented a new loss function that reduces knowledge gap between audio and text modalities in translation task effectively. Second, we investigate multiple finetuning strategies including sampling loss, language grouping and domain adaption. These strategies aims to bridge the gaps between speech and text translation tasks. We also implement a multi-stage segmentation and merging strategy that yields improvements on the unsegmented development datasets. Results show that the proposed loss function consistently improves BLEU scores on the development datasets for both English-German and multilingual models. Additionally, certain language pairs see BLEU score improvements with specific finetuning strategies.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shanbhogue-etal-2022-amazon">
<titleInfo>
<title>Amazon Alexa AI’s System for IWSLT 2022 Offline Speech Translation Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akshaya</namePart>
<namePart type="family">Shanbhogue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ran</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ching-Yun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes Amazon Alexa AI’s submission to the IWSLT 2022 Offline Speech Translation Task. Our system is an end-to-end speech translation model that leverages pretrained models and cross modality transfer learning. We detail two improvements to the knowledge transfer schema. First, we implemented a new loss function that reduces knowledge gap between audio and text modalities in translation task effectively. Second, we investigate multiple finetuning strategies including sampling loss, language grouping and domain adaption. These strategies aims to bridge the gaps between speech and text translation tasks. We also implement a multi-stage segmentation and merging strategy that yields improvements on the unsegmented development datasets. Results show that the proposed loss function consistently improves BLEU scores on the development datasets for both English-German and multilingual models. Additionally, certain language pairs see BLEU score improvements with specific finetuning strategies.</abstract>
<identifier type="citekey">shanbhogue-etal-2022-amazon</identifier>
<identifier type="doi">10.18653/v1/2022.iwslt-1.12</identifier>
<location>
<url>https://aclanthology.org/2022.iwslt-1.12</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>169</start>
<end>176</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Amazon Alexa AI’s System for IWSLT 2022 Offline Speech Translation Shared Task
%A Shanbhogue, Akshaya
%A Xue, Ran
%A Chang, Ching-Yun
%A Campbell, Sarah
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Costa-jussà, Marta
%S Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland (in-person and online)
%F shanbhogue-etal-2022-amazon
%X This paper describes Amazon Alexa AI’s submission to the IWSLT 2022 Offline Speech Translation Task. Our system is an end-to-end speech translation model that leverages pretrained models and cross modality transfer learning. We detail two improvements to the knowledge transfer schema. First, we implemented a new loss function that reduces knowledge gap between audio and text modalities in translation task effectively. Second, we investigate multiple finetuning strategies including sampling loss, language grouping and domain adaption. These strategies aims to bridge the gaps between speech and text translation tasks. We also implement a multi-stage segmentation and merging strategy that yields improvements on the unsegmented development datasets. Results show that the proposed loss function consistently improves BLEU scores on the development datasets for both English-German and multilingual models. Additionally, certain language pairs see BLEU score improvements with specific finetuning strategies.
%R 10.18653/v1/2022.iwslt-1.12
%U https://aclanthology.org/2022.iwslt-1.12
%U https://doi.org/10.18653/v1/2022.iwslt-1.12
%P 169-176
Markdown (Informal)
[Amazon Alexa AI’s System for IWSLT 2022 Offline Speech Translation Shared Task](https://aclanthology.org/2022.iwslt-1.12) (Shanbhogue et al., IWSLT 2022)
ACL