@inproceedings{che-etal-2016-punctuation,
title = "Punctuation Prediction for Unsegmented Transcript Based on Word Vector",
author = "Che, Xiaoyin and
Wang, Cheng and
Yang, Haojin and
Meinel, Christoph",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1103",
pages = "654--658",
abstract = "In this paper we propose an approach to predict punctuation marks for unsegmented speech transcript. The approach is purely lexical, with pre-trained Word Vectors as the only input. A training model of Deep Neural Network (DNN) or Convolutional Neural Network (CNN) is applied to classify whether a punctuation mark should be inserted after the third word of a 5-words sequence and which kind of punctuation mark the inserted one should be. TED talks within IWSLT dataset are used in both training and evaluation phases. The proposed approach shows its effectiveness by achieving better result than the state-of-the-art lexical solution which works with same type of data, especially when predicting puncuation position only.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="che-etal-2016-punctuation">
<titleInfo>
<title>Punctuation Prediction for Unsegmented Transcript Based on Word Vector</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaoyin</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haojin</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Meinel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we propose an approach to predict punctuation marks for unsegmented speech transcript. The approach is purely lexical, with pre-trained Word Vectors as the only input. A training model of Deep Neural Network (DNN) or Convolutional Neural Network (CNN) is applied to classify whether a punctuation mark should be inserted after the third word of a 5-words sequence and which kind of punctuation mark the inserted one should be. TED talks within IWSLT dataset are used in both training and evaluation phases. The proposed approach shows its effectiveness by achieving better result than the state-of-the-art lexical solution which works with same type of data, especially when predicting puncuation position only.</abstract>
<identifier type="citekey">che-etal-2016-punctuation</identifier>
<location>
<url>https://aclanthology.org/L16-1103</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>654</start>
<end>658</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Punctuation Prediction for Unsegmented Transcript Based on Word Vector
%A Che, Xiaoyin
%A Wang, Cheng
%A Yang, Haojin
%A Meinel, Christoph
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F che-etal-2016-punctuation
%X In this paper we propose an approach to predict punctuation marks for unsegmented speech transcript. The approach is purely lexical, with pre-trained Word Vectors as the only input. A training model of Deep Neural Network (DNN) or Convolutional Neural Network (CNN) is applied to classify whether a punctuation mark should be inserted after the third word of a 5-words sequence and which kind of punctuation mark the inserted one should be. TED talks within IWSLT dataset are used in both training and evaluation phases. The proposed approach shows its effectiveness by achieving better result than the state-of-the-art lexical solution which works with same type of data, especially when predicting puncuation position only.
%U https://aclanthology.org/L16-1103
%P 654-658
Markdown (Informal)
[Punctuation Prediction for Unsegmented Transcript Based on Word Vector](https://aclanthology.org/L16-1103) (Che et al., LREC 2016)
ACL