@inproceedings{vallet-etal-2016-speech,
title = "Speech Trax: A Bottom to the Top Approach for Speaker Tracking and Indexing in an Archiving Context",
author = "Vallet, F{\'e}licien and
Uro, Jim and
Andriamakaoly, J{\'e}r{\'e}my and
Nabi, Hakim and
Derval, Mathieu and
Carrive, Jean",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1318",
pages = "2011--2016",
abstract = "With the increasing amount of audiovisual and digital data deriving from televisual and radiophonic sources, professional archives such as INA, France{'}s national audiovisual institute, acknowledge a growing need for efficient indexing tools. In this paper, we describe the Speech Trax system that aims at analyzing the audio content of TV and radio documents. In particular, we focus on the speaker tracking task that is very valuable for indexing purposes. First, we detail the overall architecture of the system and show the results obtained on a large-scale experiment, the largest to our knowledge for this type of content (about 1,300 speakers). Then, we present the Speech Trax demonstrator that gathers the results of various automatic speech processing techniques on top of our speaker tracking system (speaker diarization, speech transcription, etc.). Finally, we provide insight on the obtained performances and suggest hints for future improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vallet-etal-2016-speech">
<titleInfo>
<title>Speech Trax: A Bottom to the Top Approach for Speaker Tracking and Indexing in an Archiving Context</title>
</titleInfo>
<name type="personal">
<namePart type="given">Félicien</namePart>
<namePart type="family">Vallet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jim</namePart>
<namePart type="family">Uro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jérémy</namePart>
<namePart type="family">Andriamakaoly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hakim</namePart>
<namePart type="family">Nabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Derval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean</namePart>
<namePart type="family">Carrive</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the increasing amount of audiovisual and digital data deriving from televisual and radiophonic sources, professional archives such as INA, France’s national audiovisual institute, acknowledge a growing need for efficient indexing tools. In this paper, we describe the Speech Trax system that aims at analyzing the audio content of TV and radio documents. In particular, we focus on the speaker tracking task that is very valuable for indexing purposes. First, we detail the overall architecture of the system and show the results obtained on a large-scale experiment, the largest to our knowledge for this type of content (about 1,300 speakers). Then, we present the Speech Trax demonstrator that gathers the results of various automatic speech processing techniques on top of our speaker tracking system (speaker diarization, speech transcription, etc.). Finally, we provide insight on the obtained performances and suggest hints for future improvements.</abstract>
<identifier type="citekey">vallet-etal-2016-speech</identifier>
<location>
<url>https://aclanthology.org/L16-1318</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>2011</start>
<end>2016</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Trax: A Bottom to the Top Approach for Speaker Tracking and Indexing in an Archiving Context
%A Vallet, Félicien
%A Uro, Jim
%A Andriamakaoly, Jérémy
%A Nabi, Hakim
%A Derval, Mathieu
%A Carrive, Jean
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F vallet-etal-2016-speech
%X With the increasing amount of audiovisual and digital data deriving from televisual and radiophonic sources, professional archives such as INA, France’s national audiovisual institute, acknowledge a growing need for efficient indexing tools. In this paper, we describe the Speech Trax system that aims at analyzing the audio content of TV and radio documents. In particular, we focus on the speaker tracking task that is very valuable for indexing purposes. First, we detail the overall architecture of the system and show the results obtained on a large-scale experiment, the largest to our knowledge for this type of content (about 1,300 speakers). Then, we present the Speech Trax demonstrator that gathers the results of various automatic speech processing techniques on top of our speaker tracking system (speaker diarization, speech transcription, etc.). Finally, we provide insight on the obtained performances and suggest hints for future improvements.
%U https://aclanthology.org/L16-1318
%P 2011-2016
Markdown (Informal)
[Speech Trax: A Bottom to the Top Approach for Speaker Tracking and Indexing in an Archiving Context](https://aclanthology.org/L16-1318) (Vallet et al., LREC 2016)
ACL