@inproceedings{ahmed-etal-2017-unsupervised,
title = "An Unsupervised Speaker Clustering Technique based on {SOM} and {I}-vectors for Speech Recognition Systems",
author = "Ahmed, Hany and
Elaraby, Mohamed and
M. Mousa, Abdullah and
Elhosiny, Mostafa and
Abdou, Sherif and
Rashwan, Mohsen",
editor = "Habash, Nizar and
Diab, Mona and
Darwish, Kareem and
El-Hajj, Wassim and
Al-Khalifa, Hend and
Bouamor, Houda and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Third {A}rabic Natural Language Processing Workshop",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1310",
doi = "10.18653/v1/W17-1310",
pages = "79--83",
abstract = "In this paper, we introduce an enhancement for speech recognition systems using an unsupervised speaker clustering technique. The proposed technique is mainly based on I-vectors and Self-Organizing Map Neural Network(SOM).The input to the proposed algorithm is a set of speech utterances. For each utterance, we extract 100-dimensional I-vector and then SOM is used to group the utterances to different speakers. In our experiments, we compared our technique with Normalized Cross Likelihood ratio Clustering (NCLR). Results show that the proposed technique reduces the speaker error rate in comparison with NCLR. Finally, we have experimented the effect of speaker clustering on Speaker Adaptive Training (SAT) in a speech recognition system implemented to test the performance of the proposed technique. It was noted that the proposed technique reduced the WER over clustering speakers with NCLR.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahmed-etal-2017-unsupervised">
<titleInfo>
<title>An Unsupervised Speaker Clustering Technique based on SOM and I-vectors for Speech Recognition Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hany</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Elaraby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullah</namePart>
<namePart type="family">M. Mousa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mostafa</namePart>
<namePart type="family">Elhosiny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sherif</namePart>
<namePart type="family">Abdou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohsen</namePart>
<namePart type="family">Rashwan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wassim</namePart>
<namePart type="family">El-Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we introduce an enhancement for speech recognition systems using an unsupervised speaker clustering technique. The proposed technique is mainly based on I-vectors and Self-Organizing Map Neural Network(SOM).The input to the proposed algorithm is a set of speech utterances. For each utterance, we extract 100-dimensional I-vector and then SOM is used to group the utterances to different speakers. In our experiments, we compared our technique with Normalized Cross Likelihood ratio Clustering (NCLR). Results show that the proposed technique reduces the speaker error rate in comparison with NCLR. Finally, we have experimented the effect of speaker clustering on Speaker Adaptive Training (SAT) in a speech recognition system implemented to test the performance of the proposed technique. It was noted that the proposed technique reduced the WER over clustering speakers with NCLR.</abstract>
<identifier type="citekey">ahmed-etal-2017-unsupervised</identifier>
<identifier type="doi">10.18653/v1/W17-1310</identifier>
<location>
<url>https://aclanthology.org/W17-1310</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>79</start>
<end>83</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Unsupervised Speaker Clustering Technique based on SOM and I-vectors for Speech Recognition Systems
%A Ahmed, Hany
%A Elaraby, Mohamed
%A M. Mousa, Abdullah
%A Elhosiny, Mostafa
%A Abdou, Sherif
%A Rashwan, Mohsen
%Y Habash, Nizar
%Y Diab, Mona
%Y Darwish, Kareem
%Y El-Hajj, Wassim
%Y Al-Khalifa, Hend
%Y Bouamor, Houda
%Y Tomeh, Nadi
%Y El-Haj, Mahmoud
%Y Zaghouani, Wajdi
%S Proceedings of the Third Arabic Natural Language Processing Workshop
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F ahmed-etal-2017-unsupervised
%X In this paper, we introduce an enhancement for speech recognition systems using an unsupervised speaker clustering technique. The proposed technique is mainly based on I-vectors and Self-Organizing Map Neural Network(SOM).The input to the proposed algorithm is a set of speech utterances. For each utterance, we extract 100-dimensional I-vector and then SOM is used to group the utterances to different speakers. In our experiments, we compared our technique with Normalized Cross Likelihood ratio Clustering (NCLR). Results show that the proposed technique reduces the speaker error rate in comparison with NCLR. Finally, we have experimented the effect of speaker clustering on Speaker Adaptive Training (SAT) in a speech recognition system implemented to test the performance of the proposed technique. It was noted that the proposed technique reduced the WER over clustering speakers with NCLR.
%R 10.18653/v1/W17-1310
%U https://aclanthology.org/W17-1310
%U https://doi.org/10.18653/v1/W17-1310
%P 79-83
Markdown (Informal)
[An Unsupervised Speaker Clustering Technique based on SOM and I-vectors for Speech Recognition Systems](https://aclanthology.org/W17-1310) (Ahmed et al., WANLP 2017)
ACL