@inproceedings{bamfo-odoom-etal-2024-speech,
title = "Speech Data from Radio Broadcasts for Low Resource Languages",
author = "Bamfo Odoom, Bismarck and
Paola Garcia Perera, Leibny and
Hansanti, Prangthip and
Barrault, Loic and
Ropers, Christophe and
Wiesner, Matthew and
Murray, Kenton and
Mourachko, Alexandre and
Koehn, Philipp",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.iwslt-1.18",
doi = "10.18653/v1/2024.iwslt-1.18",
pages = "134--139",
abstract = "We created a collection of speech data for 48 low resource languages. The corpus is extracted from radio broadcasts and processed with novel speech detection and language identification models based on a manually vetted subset of the audio for 10 languages. The data is made publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bamfo-odoom-etal-2024-speech">
<titleInfo>
<title>Speech Data from Radio Broadcasts for Low Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bismarck</namePart>
<namePart type="family">Bamfo Odoom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leibny</namePart>
<namePart type="family">Paola Garcia Perera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prangthip</namePart>
<namePart type="family">Hansanti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Loic</namePart>
<namePart type="family">Barrault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Ropers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Wiesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenton</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Mourachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We created a collection of speech data for 48 low resource languages. The corpus is extracted from radio broadcasts and processed with novel speech detection and language identification models based on a manually vetted subset of the audio for 10 languages. The data is made publicly available.</abstract>
<identifier type="citekey">bamfo-odoom-etal-2024-speech</identifier>
<identifier type="doi">10.18653/v1/2024.iwslt-1.18</identifier>
<location>
<url>https://aclanthology.org/2024.iwslt-1.18</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>134</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Data from Radio Broadcasts for Low Resource Languages
%A Bamfo Odoom, Bismarck
%A Paola Garcia Perera, Leibny
%A Hansanti, Prangthip
%A Barrault, Loic
%A Ropers, Christophe
%A Wiesner, Matthew
%A Murray, Kenton
%A Mourachko, Alexandre
%A Koehn, Philipp
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand (in-person and online)
%F bamfo-odoom-etal-2024-speech
%X We created a collection of speech data for 48 low resource languages. The corpus is extracted from radio broadcasts and processed with novel speech detection and language identification models based on a manually vetted subset of the audio for 10 languages. The data is made publicly available.
%R 10.18653/v1/2024.iwslt-1.18
%U https://aclanthology.org/2024.iwslt-1.18
%U https://doi.org/10.18653/v1/2024.iwslt-1.18
%P 134-139
Markdown (Informal)
[Speech Data from Radio Broadcasts for Low Resource Languages](https://aclanthology.org/2024.iwslt-1.18) (Bamfo Odoom et al., IWSLT 2024)
ACL
- Bismarck Bamfo Odoom, Leibny Paola Garcia Perera, Prangthip Hansanti, Loic Barrault, Christophe Ropers, Matthew Wiesner, Kenton Murray, Alexandre Mourachko, and Philipp Koehn. 2024. Speech Data from Radio Broadcasts for Low Resource Languages. In Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024), pages 134–139, Bangkok, Thailand (in-person and online). Association for Computational Linguistics.