@inproceedings{cho-etal-2013-crf,
title = "{CRF}-based disfluency detection using semantic features for {G}erman to {E}nglish spoken language translation",
author = "Cho, Eunah and
Ha, Than-Le and
Waibel, Alex",
editor = "Zhang, Joy Ying",
booktitle = "Proceedings of the 10th International Workshop on Spoken Language Translation: Papers",
month = dec # " 5-6",
year = "2013",
address = "Heidelberg, Germany",
url = "https://aclanthology.org/2013.iwslt-papers.12",
abstract = "Disfluencies in speech pose severe difficulties in machine translation of spontaneous speech. This paper presents our conditional random field (CRF)-based speech disfluency detection system developed on German to improve spoken language translation performance. In order to detect speech disfluencies considering syntactics and semantics of speech utterances, we carried out a CRF-based approach using information learned from the word representation and the phrase table used for machine translation. The word representation is gained using recurrent neural networks and projected words are clustered using the k-means algorithm. Using the output from the model trained with the word representations and phrase table information, we achieve an improvement of 1.96 BLEU points on the lecture test set. By keeping or removing humanannotated disfluencies, we show an upper bound and lower bound of translation quality. In an oracle experiment we gain 3.16 BLEU points of improvement on the lecture test set, compared to the same set with all disfluencies.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2013-crf">
<titleInfo>
<title>CRF-based disfluency detection using semantic features for German to English spoken language translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eunah</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Than-Le</namePart>
<namePart type="family">Ha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2013-dec 5-6</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joy</namePart>
<namePart type="given">Ying</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Heidelberg, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Disfluencies in speech pose severe difficulties in machine translation of spontaneous speech. This paper presents our conditional random field (CRF)-based speech disfluency detection system developed on German to improve spoken language translation performance. In order to detect speech disfluencies considering syntactics and semantics of speech utterances, we carried out a CRF-based approach using information learned from the word representation and the phrase table used for machine translation. The word representation is gained using recurrent neural networks and projected words are clustered using the k-means algorithm. Using the output from the model trained with the word representations and phrase table information, we achieve an improvement of 1.96 BLEU points on the lecture test set. By keeping or removing humanannotated disfluencies, we show an upper bound and lower bound of translation quality. In an oracle experiment we gain 3.16 BLEU points of improvement on the lecture test set, compared to the same set with all disfluencies.</abstract>
<identifier type="citekey">cho-etal-2013-crf</identifier>
<location>
<url>https://aclanthology.org/2013.iwslt-papers.12</url>
</location>
<part>
<date>2013-dec 5-6</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CRF-based disfluency detection using semantic features for German to English spoken language translation
%A Cho, Eunah
%A Ha, Than-Le
%A Waibel, Alex
%Y Zhang, Joy Ying
%S Proceedings of the 10th International Workshop on Spoken Language Translation: Papers
%D 2013
%8 dec 5 6
%C Heidelberg, Germany
%F cho-etal-2013-crf
%X Disfluencies in speech pose severe difficulties in machine translation of spontaneous speech. This paper presents our conditional random field (CRF)-based speech disfluency detection system developed on German to improve spoken language translation performance. In order to detect speech disfluencies considering syntactics and semantics of speech utterances, we carried out a CRF-based approach using information learned from the word representation and the phrase table used for machine translation. The word representation is gained using recurrent neural networks and projected words are clustered using the k-means algorithm. Using the output from the model trained with the word representations and phrase table information, we achieve an improvement of 1.96 BLEU points on the lecture test set. By keeping or removing humanannotated disfluencies, we show an upper bound and lower bound of translation quality. In an oracle experiment we gain 3.16 BLEU points of improvement on the lecture test set, compared to the same set with all disfluencies.
%U https://aclanthology.org/2013.iwslt-papers.12
Markdown (Informal)
[CRF-based disfluency detection using semantic features for German to English spoken language translation](https://aclanthology.org/2013.iwslt-papers.12) (Cho et al., IWSLT 2013)
ACL