@inproceedings{levow-2024-assessing,
title = "Assessing Pre-Built Speaker Recognition Models for Endangered Language Data",
author = "Levow, Gina-Anne",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.sigul-1.4",
pages = "27--32",
abstract = "Significant research has focused on speaker recognition, determining which speaker is speaking in a segment of audio. However, few experiments have investigated speaker recognition for very low-resource or endangered languages. Furthermore, speaker recognition has the potential to support language documentation and revitalization efforts, making recordings more accessible to researchers and communities. Since endangered language datasets are too small to build competitive speaker representations from scratch, we investigate the application of large-scale pre-built speaker recognition models to bridge this gap. This paper compares four speaker recognition models on six diverse endangered language data sets. Comparisons contrast three recent neural network-based x-vector models and an earlier baseline i-vector model. Experiments demonstrate significantly stronger performance for some of the studied models. Further analysis highlights differences in effectiveness tied to the lengths of test audio segments and amount of data used for speaker modeling.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="levow-2024-assessing">
<titleInfo>
<title>Assessing Pre-Built Speaker Recognition Models for Endangered Language Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gina-Anne</namePart>
<namePart type="family">Levow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Significant research has focused on speaker recognition, determining which speaker is speaking in a segment of audio. However, few experiments have investigated speaker recognition for very low-resource or endangered languages. Furthermore, speaker recognition has the potential to support language documentation and revitalization efforts, making recordings more accessible to researchers and communities. Since endangered language datasets are too small to build competitive speaker representations from scratch, we investigate the application of large-scale pre-built speaker recognition models to bridge this gap. This paper compares four speaker recognition models on six diverse endangered language data sets. Comparisons contrast three recent neural network-based x-vector models and an earlier baseline i-vector model. Experiments demonstrate significantly stronger performance for some of the studied models. Further analysis highlights differences in effectiveness tied to the lengths of test audio segments and amount of data used for speaker modeling.</abstract>
<identifier type="citekey">levow-2024-assessing</identifier>
<location>
<url>https://aclanthology.org/2024.sigul-1.4</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>27</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Pre-Built Speaker Recognition Models for Endangered Language Data
%A Levow, Gina-Anne
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F levow-2024-assessing
%X Significant research has focused on speaker recognition, determining which speaker is speaking in a segment of audio. However, few experiments have investigated speaker recognition for very low-resource or endangered languages. Furthermore, speaker recognition has the potential to support language documentation and revitalization efforts, making recordings more accessible to researchers and communities. Since endangered language datasets are too small to build competitive speaker representations from scratch, we investigate the application of large-scale pre-built speaker recognition models to bridge this gap. This paper compares four speaker recognition models on six diverse endangered language data sets. Comparisons contrast three recent neural network-based x-vector models and an earlier baseline i-vector model. Experiments demonstrate significantly stronger performance for some of the studied models. Further analysis highlights differences in effectiveness tied to the lengths of test audio segments and amount of data used for speaker modeling.
%U https://aclanthology.org/2024.sigul-1.4
%P 27-32
Markdown (Informal)
[Assessing Pre-Built Speaker Recognition Models for Endangered Language Data](https://aclanthology.org/2024.sigul-1.4) (Levow, SIGUL-WS 2024)
ACL