@inproceedings{sergienko-etal-2016-comparative,
title = "A Comparative Study of Text Preprocessing Approaches for Topic Detection of User Utterances",
author = "Sergienko, Roman and
Shan, Muhammad and
Minker, Wolfgang",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1288",
pages = "1826--1831",
abstract = "The paper describes a comparative study of existing and novel text preprocessing and classification techniques for domain detection of user utterances. Two corpora are considered. The first one contains customer calls to a call centre for further call routing; the second one contains answers of call centre employees with different kinds of customer orientation behaviour. Seven different unsupervised and supervised term weighting methods were applied. The collective use of term weighting methods is proposed for classification effectiveness improvement. Four different dimensionality reduction methods were applied: stop-words filtering with stemming, feature selection based on term weights, feature transformation based on term clustering, and a novel feature transformation method based on terms belonging to classes. As classification algorithms we used k-NN and a SVM-based algorithm. The numerical experiments have shown that the simultaneous use of the novel proposed approaches (collectives of term weighting methods and the novel feature transformation method) allows reaching the high classification results with very small number of features.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sergienko-etal-2016-comparative">
<titleInfo>
<title>A Comparative Study of Text Preprocessing Approaches for Topic Detection of User Utterances</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Sergienko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Shan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Minker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper describes a comparative study of existing and novel text preprocessing and classification techniques for domain detection of user utterances. Two corpora are considered. The first one contains customer calls to a call centre for further call routing; the second one contains answers of call centre employees with different kinds of customer orientation behaviour. Seven different unsupervised and supervised term weighting methods were applied. The collective use of term weighting methods is proposed for classification effectiveness improvement. Four different dimensionality reduction methods were applied: stop-words filtering with stemming, feature selection based on term weights, feature transformation based on term clustering, and a novel feature transformation method based on terms belonging to classes. As classification algorithms we used k-NN and a SVM-based algorithm. The numerical experiments have shown that the simultaneous use of the novel proposed approaches (collectives of term weighting methods and the novel feature transformation method) allows reaching the high classification results with very small number of features.</abstract>
<identifier type="citekey">sergienko-etal-2016-comparative</identifier>
<location>
<url>https://aclanthology.org/L16-1288</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>1826</start>
<end>1831</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparative Study of Text Preprocessing Approaches for Topic Detection of User Utterances
%A Sergienko, Roman
%A Shan, Muhammad
%A Minker, Wolfgang
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F sergienko-etal-2016-comparative
%X The paper describes a comparative study of existing and novel text preprocessing and classification techniques for domain detection of user utterances. Two corpora are considered. The first one contains customer calls to a call centre for further call routing; the second one contains answers of call centre employees with different kinds of customer orientation behaviour. Seven different unsupervised and supervised term weighting methods were applied. The collective use of term weighting methods is proposed for classification effectiveness improvement. Four different dimensionality reduction methods were applied: stop-words filtering with stemming, feature selection based on term weights, feature transformation based on term clustering, and a novel feature transformation method based on terms belonging to classes. As classification algorithms we used k-NN and a SVM-based algorithm. The numerical experiments have shown that the simultaneous use of the novel proposed approaches (collectives of term weighting methods and the novel feature transformation method) allows reaching the high classification results with very small number of features.
%U https://aclanthology.org/L16-1288
%P 1826-1831
Markdown (Informal)
[A Comparative Study of Text Preprocessing Approaches for Topic Detection of User Utterances](https://aclanthology.org/L16-1288) (Sergienko et al., LREC 2016)
ACL