@inproceedings{salesky-etal-2019-fluent,
title = "Fluent Translations from Disfluent Speech in End-to-End Speech Translation",
author = "Salesky, Elizabeth and
Sperber, Matthias and
Waibel, Alexander",
editor = "Burstein, Jill and
Doran, Christy and
Solorio, Thamar",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1285/",
doi = "10.18653/v1/N19-1285",
pages = "2786--2792",
abstract = "Spoken language translation applications for speech suffer due to conversational speech phenomena, particularly the presence of disfluencies. With the rise of end-to-end speech translation models, processing steps such as disfluency removal that were previously an intermediate step between speech recognition and machine translation need to be incorporated into model architectures. We use a sequence-to-sequence model to translate from noisy, disfluent speech to fluent text with disfluencies removed using the recently collected {\textquoteleft}copy-edited' references for the Fisher Spanish-English dataset. We are able to directly generate fluent translations and introduce considerations about how to evaluate success on this task. This work provides a baseline for a new task, implicitly removing disfluencies in end-to-end translation of conversational speech."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="salesky-etal-2019-fluent">
<titleInfo>
<title>Fluent Translations from Disfluent Speech in End-to-End Speech Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Sperber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christy</namePart>
<namePart type="family">Doran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spoken language translation applications for speech suffer due to conversational speech phenomena, particularly the presence of disfluencies. With the rise of end-to-end speech translation models, processing steps such as disfluency removal that were previously an intermediate step between speech recognition and machine translation need to be incorporated into model architectures. We use a sequence-to-sequence model to translate from noisy, disfluent speech to fluent text with disfluencies removed using the recently collected ‘copy-edited’ references for the Fisher Spanish-English dataset. We are able to directly generate fluent translations and introduce considerations about how to evaluate success on this task. This work provides a baseline for a new task, implicitly removing disfluencies in end-to-end translation of conversational speech.</abstract>
<identifier type="citekey">salesky-etal-2019-fluent</identifier>
<identifier type="doi">10.18653/v1/N19-1285</identifier>
<location>
<url>https://aclanthology.org/N19-1285/</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>2786</start>
<end>2792</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fluent Translations from Disfluent Speech in End-to-End Speech Translation
%A Salesky, Elizabeth
%A Sperber, Matthias
%A Waibel, Alexander
%Y Burstein, Jill
%Y Doran, Christy
%Y Solorio, Thamar
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F salesky-etal-2019-fluent
%X Spoken language translation applications for speech suffer due to conversational speech phenomena, particularly the presence of disfluencies. With the rise of end-to-end speech translation models, processing steps such as disfluency removal that were previously an intermediate step between speech recognition and machine translation need to be incorporated into model architectures. We use a sequence-to-sequence model to translate from noisy, disfluent speech to fluent text with disfluencies removed using the recently collected ‘copy-edited’ references for the Fisher Spanish-English dataset. We are able to directly generate fluent translations and introduce considerations about how to evaluate success on this task. This work provides a baseline for a new task, implicitly removing disfluencies in end-to-end translation of conversational speech.
%R 10.18653/v1/N19-1285
%U https://aclanthology.org/N19-1285/
%U https://doi.org/10.18653/v1/N19-1285
%P 2786-2792
Markdown (Informal)
[Fluent Translations from Disfluent Speech in End-to-End Speech Translation](https://aclanthology.org/N19-1285/) (Salesky et al., NAACL 2019)
ACL
- Elizabeth Salesky, Matthias Sperber, and Alexander Waibel. 2019. Fluent Translations from Disfluent Speech in End-to-End Speech Translation. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 2786–2792, Minneapolis, Minnesota. Association for Computational Linguistics.