@inproceedings{guevara-etal-2024-philippine,
title = "{P}hilippine Languages Database: A Multilingual Speech Corpora for Developing Systems for Low-Resource Languages",
author = "Guevara, Rowena Cristina L. and
Cajote, Rhandley D. and
Bayona, Michael Gringo Angelo R. and
Lucas, Crisron Rudolf G.",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.sigul-1.32",
pages = "264--271",
abstract = "Previous efforts to collect Filipino speech were done in the development of Filipino-Speech Corpus, TAGCO, and Filipino-Bisaya speech corpus. These corpora, however, are either domain-specific, non-parallel, non-multilingual or relatively insufficient for the development of state-of-the-art Automatic Speech Recognizers (ASR) and Text-To-Speech Systems (TTS) which usually requires hundreds of hours of speech data. This paper presents a multilingual corpora for the Philippine languages namely: Filipino, English, Cebuano, Kapampangan, Hiligaynon, Ilokano, Bikolano, Waray, and Tausug. PLD includes over 454 hours of recordings from speakers of the ten languages, covering multiple domains in news, medical, education, tourism and spontaneous speech. The applicability of the corpus has also been demonstrated in adult and children ASR, phoneme transcriber, voice conversion, and TTS applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guevara-etal-2024-philippine">
<titleInfo>
<title>Philippine Languages Database: A Multilingual Speech Corpora for Developing Systems for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rowena</namePart>
<namePart type="given">Cristina</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Guevara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rhandley</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Cajote</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="given">Gringo</namePart>
<namePart type="given">Angelo</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Bayona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Crisron</namePart>
<namePart type="given">Rudolf</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Lucas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous efforts to collect Filipino speech were done in the development of Filipino-Speech Corpus, TAGCO, and Filipino-Bisaya speech corpus. These corpora, however, are either domain-specific, non-parallel, non-multilingual or relatively insufficient for the development of state-of-the-art Automatic Speech Recognizers (ASR) and Text-To-Speech Systems (TTS) which usually requires hundreds of hours of speech data. This paper presents a multilingual corpora for the Philippine languages namely: Filipino, English, Cebuano, Kapampangan, Hiligaynon, Ilokano, Bikolano, Waray, and Tausug. PLD includes over 454 hours of recordings from speakers of the ten languages, covering multiple domains in news, medical, education, tourism and spontaneous speech. The applicability of the corpus has also been demonstrated in adult and children ASR, phoneme transcriber, voice conversion, and TTS applications.</abstract>
<identifier type="citekey">guevara-etal-2024-philippine</identifier>
<location>
<url>https://aclanthology.org/2024.sigul-1.32</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>264</start>
<end>271</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Philippine Languages Database: A Multilingual Speech Corpora for Developing Systems for Low-Resource Languages
%A Guevara, Rowena Cristina L.
%A Cajote, Rhandley D.
%A Bayona, Michael Gringo Angelo R.
%A Lucas, Crisron Rudolf G.
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F guevara-etal-2024-philippine
%X Previous efforts to collect Filipino speech were done in the development of Filipino-Speech Corpus, TAGCO, and Filipino-Bisaya speech corpus. These corpora, however, are either domain-specific, non-parallel, non-multilingual or relatively insufficient for the development of state-of-the-art Automatic Speech Recognizers (ASR) and Text-To-Speech Systems (TTS) which usually requires hundreds of hours of speech data. This paper presents a multilingual corpora for the Philippine languages namely: Filipino, English, Cebuano, Kapampangan, Hiligaynon, Ilokano, Bikolano, Waray, and Tausug. PLD includes over 454 hours of recordings from speakers of the ten languages, covering multiple domains in news, medical, education, tourism and spontaneous speech. The applicability of the corpus has also been demonstrated in adult and children ASR, phoneme transcriber, voice conversion, and TTS applications.
%U https://aclanthology.org/2024.sigul-1.32
%P 264-271
Markdown (Informal)
[Philippine Languages Database: A Multilingual Speech Corpora for Developing Systems for Low-Resource Languages](https://aclanthology.org/2024.sigul-1.32) (Guevara et al., SIGUL-WS 2024)
ACL