@inproceedings{garnier-rizet-etal-2008-callsurf,
title = "{C}all{S}urf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content",
author = "Garnier-Rizet, Martine and
Adda, Gilles and
Cailliau, Frederik and
Guillemin-Lanne, Sylvie and
Waast-Richard, Claire and
Lamel, Lori and
Vanni, Stephan and
Waast-Richard, Claire",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/652_paper.pdf",
abstract = "Being the clients first interface, call centres worldwide contain a huge amount of information of all kind under the form of conversational speech. If accessible, this information can be used to detect eg. major events and organizational flaws, improve customer relations and marketing strategies. An efficient way to exploit the unstructured data of telephone calls is data-mining, but current techniques apply on text only. The CallSurf project gathers a number of academic and industrial partners covering the complete platform, from automatic transcription to information retrieval and data mining. This paper concentrates on the speech recognition module as it discusses the collection, the manual transcription of the training corpus and the techniques used to build the language model. The NLP techniques used to pre-process the transcribed corpus for data mining are POS tagging, lemmatization, noun group and named entity recognition. Some of them have been especially adapted to the conversational speech characteristics. POS tagging and preliminary data mining results obtained on the manually transcribed corpus are briefly discussed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="garnier-rizet-etal-2008-callsurf">
<titleInfo>
<title>CallSurf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martine</namePart>
<namePart type="family">Garnier-Rizet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gilles</namePart>
<namePart type="family">Adda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederik</namePart>
<namePart type="family">Cailliau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sylvie</namePart>
<namePart type="family">Guillemin-Lanne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Waast-Richard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lori</namePart>
<namePart type="family">Lamel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephan</namePart>
<namePart type="family">Vanni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Being the clients first interface, call centres worldwide contain a huge amount of information of all kind under the form of conversational speech. If accessible, this information can be used to detect eg. major events and organizational flaws, improve customer relations and marketing strategies. An efficient way to exploit the unstructured data of telephone calls is data-mining, but current techniques apply on text only. The CallSurf project gathers a number of academic and industrial partners covering the complete platform, from automatic transcription to information retrieval and data mining. This paper concentrates on the speech recognition module as it discusses the collection, the manual transcription of the training corpus and the techniques used to build the language model. The NLP techniques used to pre-process the transcribed corpus for data mining are POS tagging, lemmatization, noun group and named entity recognition. Some of them have been especially adapted to the conversational speech characteristics. POS tagging and preliminary data mining results obtained on the manually transcribed corpus are briefly discussed.</abstract>
<identifier type="citekey">garnier-rizet-etal-2008-callsurf</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/652_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CallSurf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content
%A Garnier-Rizet, Martine
%A Adda, Gilles
%A Cailliau, Frederik
%A Guillemin-Lanne, Sylvie
%A Waast-Richard, Claire
%A Lamel, Lori
%A Vanni, Stephan
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F garnier-rizet-etal-2008-callsurf
%X Being the clients first interface, call centres worldwide contain a huge amount of information of all kind under the form of conversational speech. If accessible, this information can be used to detect eg. major events and organizational flaws, improve customer relations and marketing strategies. An efficient way to exploit the unstructured data of telephone calls is data-mining, but current techniques apply on text only. The CallSurf project gathers a number of academic and industrial partners covering the complete platform, from automatic transcription to information retrieval and data mining. This paper concentrates on the speech recognition module as it discusses the collection, the manual transcription of the training corpus and the techniques used to build the language model. The NLP techniques used to pre-process the transcribed corpus for data mining are POS tagging, lemmatization, noun group and named entity recognition. Some of them have been especially adapted to the conversational speech characteristics. POS tagging and preliminary data mining results obtained on the manually transcribed corpus are briefly discussed.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/652_paper.pdf
Markdown (Informal)
[CallSurf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content](http://www.lrec-conf.org/proceedings/lrec2008/pdf/652_paper.pdf) (Garnier-Rizet et al., LREC 2008)
ACL
- Martine Garnier-Rizet, Gilles Adda, Frederik Cailliau, Sylvie Guillemin-Lanne, Claire Waast-Richard, Lori Lamel, Stephan Vanni, and Claire Waast-Richard. 2008. CallSurf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content. In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), Marrakech, Morocco. European Language Resources Association (ELRA).