@inproceedings{delgado-etal-2020-safe,
title = "The {SAFE}-{T} Corpus: A New Resource for Simulated Public Safety Communications",
author = "Delgado, Dana and
Walker, Kevin and
Strassel, Stephanie and
Jones, Karen and
Caruso, Christopher and
Graff, David",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.794/",
pages = "6450--6457",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "We introduce a new resource, the SAFE-T (Speech Analysis for Emergency Response Technology) Corpus, designed to simulate first-responder communications by inducing high vocal effort and urgent speech with situational background noise in a game-based collection protocol. Linguistic Data Consortium developed the SAFE-T Corpus to support the NIST (National Institute of Standards and Technology) OpenSAT (Speech Analytic Technologies) evaluation series, whose goal is to advance speech analytic technologies including automatic speech recognition, speech activity detection and keyword search in multiple domains including simulated public safety communications data. The corpus comprises over 300 hours of audio from 115 unique speakers engaged in a collaborative problem-solving activity representative of public safety communications in terms of speech content, noise types and noise levels. Portions of the corpus have been used in the OpenSAT 2019 evaluation and the full corpus will be published in the LDC catalog. We describe the design and implementation of the SAFE-T Corpus collection, discuss the approach of capturing spontaneous speech from study participants through game-based speech collection, and report on the collection results including several challenges associated with the collection."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="delgado-etal-2020-safe">
<titleInfo>
<title>The SAFE-T Corpus: A New Resource for Simulated Public Safety Communications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Delgado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Walker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Strassel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karen</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Caruso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Graff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We introduce a new resource, the SAFE-T (Speech Analysis for Emergency Response Technology) Corpus, designed to simulate first-responder communications by inducing high vocal effort and urgent speech with situational background noise in a game-based collection protocol. Linguistic Data Consortium developed the SAFE-T Corpus to support the NIST (National Institute of Standards and Technology) OpenSAT (Speech Analytic Technologies) evaluation series, whose goal is to advance speech analytic technologies including automatic speech recognition, speech activity detection and keyword search in multiple domains including simulated public safety communications data. The corpus comprises over 300 hours of audio from 115 unique speakers engaged in a collaborative problem-solving activity representative of public safety communications in terms of speech content, noise types and noise levels. Portions of the corpus have been used in the OpenSAT 2019 evaluation and the full corpus will be published in the LDC catalog. We describe the design and implementation of the SAFE-T Corpus collection, discuss the approach of capturing spontaneous speech from study participants through game-based speech collection, and report on the collection results including several challenges associated with the collection.</abstract>
<identifier type="citekey">delgado-etal-2020-safe</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.794/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>6450</start>
<end>6457</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The SAFE-T Corpus: A New Resource for Simulated Public Safety Communications
%A Delgado, Dana
%A Walker, Kevin
%A Strassel, Stephanie
%A Jones, Karen
%A Caruso, Christopher
%A Graff, David
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G eng
%F delgado-etal-2020-safe
%X We introduce a new resource, the SAFE-T (Speech Analysis for Emergency Response Technology) Corpus, designed to simulate first-responder communications by inducing high vocal effort and urgent speech with situational background noise in a game-based collection protocol. Linguistic Data Consortium developed the SAFE-T Corpus to support the NIST (National Institute of Standards and Technology) OpenSAT (Speech Analytic Technologies) evaluation series, whose goal is to advance speech analytic technologies including automatic speech recognition, speech activity detection and keyword search in multiple domains including simulated public safety communications data. The corpus comprises over 300 hours of audio from 115 unique speakers engaged in a collaborative problem-solving activity representative of public safety communications in terms of speech content, noise types and noise levels. Portions of the corpus have been used in the OpenSAT 2019 evaluation and the full corpus will be published in the LDC catalog. We describe the design and implementation of the SAFE-T Corpus collection, discuss the approach of capturing spontaneous speech from study participants through game-based speech collection, and report on the collection results including several challenges associated with the collection.
%U https://aclanthology.org/2020.lrec-1.794/
%P 6450-6457
Markdown (Informal)
[The SAFE-T Corpus: A New Resource for Simulated Public Safety Communications](https://aclanthology.org/2020.lrec-1.794/) (Delgado et al., LREC 2020)
ACL