@inproceedings{pineiro-martin-etal-2024-falai,
title = "{F}al{AI}: A Dataset for End-to-end Spoken Language Understanding in a Low-Resource Scenario",
author = "Pineiro-Martin, Andres and
Garcia-Mateo, Carmen and
Docio-Fernandez, Laura and
Lopez-Perez, Maria del Carmen and
Gandarela-Rodriguez, Jose",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.624",
pages = "7107--7116",
abstract = "End-to-end (E2E) Spoken Language Understanding (SLU) systems infer structured information directly from the speech signal using a single model. Due to the success of virtual assistants and the increasing demand for speech interfaces, these architectures are being actively researched for their potential to improve system performance by exploiting acoustic information and avoiding the cascading errors of traditional architectures. However, these systems require large amounts of specific, well-labelled speech data for training, which is expensive to obtain even in English, where the number of public audio datasets for SLU is limited. In this paper, we release the FalAI dataset, the largest public SLU dataset in terms of hours (250 hours), recordings (260,000) and participants (over 10,000), which is also the first SLU dataset in Galician and the first to be obtained in a low-resource scenario. Furthermore, we present new measures of complexity for the text corpora, the strategies followed for the design, collection and validation of the dataset, and we define splits for noisy audio, hesitant audio and audio where the sentence has changed but the structured information is preserved. These novel splits provide a unique resource for testing SLU systems in challenging, real-world scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pineiro-martin-etal-2024-falai">
<titleInfo>
<title>FalAI: A Dataset for End-to-end Spoken Language Understanding in a Low-Resource Scenario</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andres</namePart>
<namePart type="family">Pineiro-Martin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carmen</namePart>
<namePart type="family">Garcia-Mateo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Docio-Fernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">del</namePart>
<namePart type="given">Carmen</namePart>
<namePart type="family">Lopez-Perez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Gandarela-Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>End-to-end (E2E) Spoken Language Understanding (SLU) systems infer structured information directly from the speech signal using a single model. Due to the success of virtual assistants and the increasing demand for speech interfaces, these architectures are being actively researched for their potential to improve system performance by exploiting acoustic information and avoiding the cascading errors of traditional architectures. However, these systems require large amounts of specific, well-labelled speech data for training, which is expensive to obtain even in English, where the number of public audio datasets for SLU is limited. In this paper, we release the FalAI dataset, the largest public SLU dataset in terms of hours (250 hours), recordings (260,000) and participants (over 10,000), which is also the first SLU dataset in Galician and the first to be obtained in a low-resource scenario. Furthermore, we present new measures of complexity for the text corpora, the strategies followed for the design, collection and validation of the dataset, and we define splits for noisy audio, hesitant audio and audio where the sentence has changed but the structured information is preserved. These novel splits provide a unique resource for testing SLU systems in challenging, real-world scenarios.</abstract>
<identifier type="citekey">pineiro-martin-etal-2024-falai</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.624</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7107</start>
<end>7116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FalAI: A Dataset for End-to-end Spoken Language Understanding in a Low-Resource Scenario
%A Pineiro-Martin, Andres
%A Garcia-Mateo, Carmen
%A Docio-Fernandez, Laura
%A Lopez-Perez, Maria del Carmen
%A Gandarela-Rodriguez, Jose
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F pineiro-martin-etal-2024-falai
%X End-to-end (E2E) Spoken Language Understanding (SLU) systems infer structured information directly from the speech signal using a single model. Due to the success of virtual assistants and the increasing demand for speech interfaces, these architectures are being actively researched for their potential to improve system performance by exploiting acoustic information and avoiding the cascading errors of traditional architectures. However, these systems require large amounts of specific, well-labelled speech data for training, which is expensive to obtain even in English, where the number of public audio datasets for SLU is limited. In this paper, we release the FalAI dataset, the largest public SLU dataset in terms of hours (250 hours), recordings (260,000) and participants (over 10,000), which is also the first SLU dataset in Galician and the first to be obtained in a low-resource scenario. Furthermore, we present new measures of complexity for the text corpora, the strategies followed for the design, collection and validation of the dataset, and we define splits for noisy audio, hesitant audio and audio where the sentence has changed but the structured information is preserved. These novel splits provide a unique resource for testing SLU systems in challenging, real-world scenarios.
%U https://aclanthology.org/2024.lrec-main.624
%P 7107-7116
Markdown (Informal)
[FalAI: A Dataset for End-to-end Spoken Language Understanding in a Low-Resource Scenario](https://aclanthology.org/2024.lrec-main.624) (Pineiro-Martin et al., LREC-COLING 2024)
ACL