@inproceedings{luengo-etal-2010-modified,
title = "Modified {LTSE}-{VAD} Algorithm for Applications Requiring Reduced Silence Frame Misclassification",
author = "Luengo, Iker and
Navas, Eva and
Odriozola, Igor and
Saratxaga, Ibon and
Hernaez, Inmaculada and
Sainz, I{\~n}aki and
Erro, Daniel",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/741_Paper.pdf",
abstract = "The LTSE-VAD is one of the best known algorithms for voice activity detection. In this paper we present a modified version of this algorithm, that makes the VAD decision not taking into account account the estimated background noise level, but the signal to noise ratio (SNR). This makes the algorithm robust not only to noise level changes, but also to signal level changes. We compare the modified algorithm with the original one, and with three other standard VAD systems. The results show that the modified version gets the lowest silence misclassification rate, while maintaining a reasonably low speech misclassification rate. As a result, this algorithm is more suitable for identification tasks, such as speaker or emotion recognition, where silence misclassification can be very harmful. A series of automatic emotion identification experiments are also carried out, proving that the modified version of the algorithm helps increasing the correct emotion classification rate.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luengo-etal-2010-modified">
<titleInfo>
<title>Modified LTSE-VAD Algorithm for Applications Requiring Reduced Silence Frame Misclassification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iker</namePart>
<namePart type="family">Luengo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Navas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Igor</namePart>
<namePart type="family">Odriozola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibon</namePart>
<namePart type="family">Saratxaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inmaculada</namePart>
<namePart type="family">Hernaez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iñaki</namePart>
<namePart type="family">Sainz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Erro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The LTSE-VAD is one of the best known algorithms for voice activity detection. In this paper we present a modified version of this algorithm, that makes the VAD decision not taking into account account the estimated background noise level, but the signal to noise ratio (SNR). This makes the algorithm robust not only to noise level changes, but also to signal level changes. We compare the modified algorithm with the original one, and with three other standard VAD systems. The results show that the modified version gets the lowest silence misclassification rate, while maintaining a reasonably low speech misclassification rate. As a result, this algorithm is more suitable for identification tasks, such as speaker or emotion recognition, where silence misclassification can be very harmful. A series of automatic emotion identification experiments are also carried out, proving that the modified version of the algorithm helps increasing the correct emotion classification rate.</abstract>
<identifier type="citekey">luengo-etal-2010-modified</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/741_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modified LTSE-VAD Algorithm for Applications Requiring Reduced Silence Frame Misclassification
%A Luengo, Iker
%A Navas, Eva
%A Odriozola, Igor
%A Saratxaga, Ibon
%A Hernaez, Inmaculada
%A Sainz, Iñaki
%A Erro, Daniel
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F luengo-etal-2010-modified
%X The LTSE-VAD is one of the best known algorithms for voice activity detection. In this paper we present a modified version of this algorithm, that makes the VAD decision not taking into account account the estimated background noise level, but the signal to noise ratio (SNR). This makes the algorithm robust not only to noise level changes, but also to signal level changes. We compare the modified algorithm with the original one, and with three other standard VAD systems. The results show that the modified version gets the lowest silence misclassification rate, while maintaining a reasonably low speech misclassification rate. As a result, this algorithm is more suitable for identification tasks, such as speaker or emotion recognition, where silence misclassification can be very harmful. A series of automatic emotion identification experiments are also carried out, proving that the modified version of the algorithm helps increasing the correct emotion classification rate.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/741_Paper.pdf
Markdown (Informal)
[Modified LTSE-VAD Algorithm for Applications Requiring Reduced Silence Frame Misclassification](http://www.lrec-conf.org/proceedings/lrec2010/pdf/741_Paper.pdf) (Luengo et al., LREC 2010)
ACL