@inproceedings{mendonca-etal-2022-towards,
title = "Towards Speaker Verification for Crowdsourced Speech Collections",
author = "Mendonca, John and
Correia, Rui and
Louren{\c{c}}o, Mariana and
Freitas, Jo{\~a}o and
Trancoso, Isabel",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.637",
pages = "5929--5937",
abstract = "Crowdsourcing the collection of speech provides a scalable setting to access a customisable demographic according to each dataset{'}s needs. The correctness of speaker metadata is especially relevant for speaker-centred collections - ones that require the collection of a fixed amount of data per speaker. This paper identifies two different types of misalignment present in these collections: Multiple Accounts misalignment (different contributors map to the same speaker), and Multiple Speakers misalignment (multiple speakers map to the same contributor). Based on state-of-the-art approaches to Speaker Verification, this paper proposes an unsupervised method for measuring speaker metadata plausibility of a collection, i.e., evaluating the match (or lack thereof) between contributors and speakers. The solution presented is composed of an embedding extractor and a clustering module. Results indicate high precision in automatically classifying contributor alignment ({\textgreater}0.94).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mendonca-etal-2022-towards">
<titleInfo>
<title>Towards Speaker Verification for Crowdsourced Speech Collections</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Mendonca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Correia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Lourenço</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Crowdsourcing the collection of speech provides a scalable setting to access a customisable demographic according to each dataset’s needs. The correctness of speaker metadata is especially relevant for speaker-centred collections - ones that require the collection of a fixed amount of data per speaker. This paper identifies two different types of misalignment present in these collections: Multiple Accounts misalignment (different contributors map to the same speaker), and Multiple Speakers misalignment (multiple speakers map to the same contributor). Based on state-of-the-art approaches to Speaker Verification, this paper proposes an unsupervised method for measuring speaker metadata plausibility of a collection, i.e., evaluating the match (or lack thereof) between contributors and speakers. The solution presented is composed of an embedding extractor and a clustering module. Results indicate high precision in automatically classifying contributor alignment (\textgreater0.94).</abstract>
<identifier type="citekey">mendonca-etal-2022-towards</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.637</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>5929</start>
<end>5937</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Speaker Verification for Crowdsourced Speech Collections
%A Mendonca, John
%A Correia, Rui
%A Lourenço, Mariana
%A Freitas, João
%A Trancoso, Isabel
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F mendonca-etal-2022-towards
%X Crowdsourcing the collection of speech provides a scalable setting to access a customisable demographic according to each dataset’s needs. The correctness of speaker metadata is especially relevant for speaker-centred collections - ones that require the collection of a fixed amount of data per speaker. This paper identifies two different types of misalignment present in these collections: Multiple Accounts misalignment (different contributors map to the same speaker), and Multiple Speakers misalignment (multiple speakers map to the same contributor). Based on state-of-the-art approaches to Speaker Verification, this paper proposes an unsupervised method for measuring speaker metadata plausibility of a collection, i.e., evaluating the match (or lack thereof) between contributors and speakers. The solution presented is composed of an embedding extractor and a clustering module. Results indicate high precision in automatically classifying contributor alignment (\textgreater0.94).
%U https://aclanthology.org/2022.lrec-1.637
%P 5929-5937
Markdown (Informal)
[Towards Speaker Verification for Crowdsourced Speech Collections](https://aclanthology.org/2022.lrec-1.637) (Mendonca et al., LREC 2022)
ACL