@inproceedings{cho-etal-2016-multilingual,
title = "Multilingual Disfluency Removal using {NMT}",
author = "Cho, Eunah and
Niehues, Jan and
Ha, Thanh-Le and
Waibel, Alex",
editor = {Cettolo, Mauro and
Niehues, Jan and
St{\"u}ker, Sebastian and
Bentivogli, Luisa and
Cattoni, Rolando and
Federico, Marcello},
booktitle = "Proceedings of the 13th International Conference on Spoken Language Translation",
month = dec # " 8-9",
year = "2016",
address = "Seattle, Washington D.C",
publisher = "International Workshop on Spoken Language Translation",
url = "https://aclanthology.org/2016.iwslt-1.10",
abstract = "In this paper, we investigate a multilingual approach for speech disfluency removal. A major challenge of this task comes from the costly nature of disfluency annotation. Motivated by the fact that speech disfluencies are commonly observed throughout different languages, we investigate the potential of multilingual disfluency modeling. We suggest that learning a joint representation of the disfluencies in multiple languages can be a promising solution to the data sparsity issue. In this work, we utilize a multilingual neural machine translation system, where a disfluent speech transcript is directly transformed into a cleaned up text. Disfluency removal experiments on English and German speech transcripts show that multilingual disfluency modeling outperforms the single language systems. In a following experiment, we show that the improvements are also observed in a downstream application using the disfluency-removed transcripts as input.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2016-multilingual">
<titleInfo>
<title>Multilingual Disfluency Removal using NMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eunah</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thanh-Le</namePart>
<namePart type="family">Ha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-dec 8-9</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Spoken Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mauro</namePart>
<namePart type="family">Cettolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Bentivogli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Cattoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Workshop on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Seattle, Washington D.C</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate a multilingual approach for speech disfluency removal. A major challenge of this task comes from the costly nature of disfluency annotation. Motivated by the fact that speech disfluencies are commonly observed throughout different languages, we investigate the potential of multilingual disfluency modeling. We suggest that learning a joint representation of the disfluencies in multiple languages can be a promising solution to the data sparsity issue. In this work, we utilize a multilingual neural machine translation system, where a disfluent speech transcript is directly transformed into a cleaned up text. Disfluency removal experiments on English and German speech transcripts show that multilingual disfluency modeling outperforms the single language systems. In a following experiment, we show that the improvements are also observed in a downstream application using the disfluency-removed transcripts as input.</abstract>
<identifier type="citekey">cho-etal-2016-multilingual</identifier>
<location>
<url>https://aclanthology.org/2016.iwslt-1.10</url>
</location>
<part>
<date>2016-dec 8-9</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Disfluency Removal using NMT
%A Cho, Eunah
%A Niehues, Jan
%A Ha, Thanh-Le
%A Waibel, Alex
%Y Cettolo, Mauro
%Y Niehues, Jan
%Y Stüker, Sebastian
%Y Bentivogli, Luisa
%Y Cattoni, Rolando
%Y Federico, Marcello
%S Proceedings of the 13th International Conference on Spoken Language Translation
%D 2016
%8 dec 8 9
%I International Workshop on Spoken Language Translation
%C Seattle, Washington D.C
%F cho-etal-2016-multilingual
%X In this paper, we investigate a multilingual approach for speech disfluency removal. A major challenge of this task comes from the costly nature of disfluency annotation. Motivated by the fact that speech disfluencies are commonly observed throughout different languages, we investigate the potential of multilingual disfluency modeling. We suggest that learning a joint representation of the disfluencies in multiple languages can be a promising solution to the data sparsity issue. In this work, we utilize a multilingual neural machine translation system, where a disfluent speech transcript is directly transformed into a cleaned up text. Disfluency removal experiments on English and German speech transcripts show that multilingual disfluency modeling outperforms the single language systems. In a following experiment, we show that the improvements are also observed in a downstream application using the disfluency-removed transcripts as input.
%U https://aclanthology.org/2016.iwslt-1.10
Markdown (Informal)
[Multilingual Disfluency Removal using NMT](https://aclanthology.org/2016.iwslt-1.10) (Cho et al., IWSLT 2016)
ACL
- Eunah Cho, Jan Niehues, Thanh-Le Ha, and Alex Waibel. 2016. Multilingual Disfluency Removal using NMT. In Proceedings of the 13th International Conference on Spoken Language Translation, Seattle, Washington D.C. International Workshop on Spoken Language Translation.