@inproceedings{bamfo-odoom-etal-2024-speech,
title = "Speech Data from Radio Broadcasts for Low Resource Languages",
author = {Bamfo Odoom, Bismarck and
Garcia, Paola Leibny and
Hansanti, Prangthip and
Barrault, Lo{\"i}c and
Ropers, Christophe and
Wiesner, Matthew and
Murray, Kenton and
Mourachko, Alex and
Koehn, Philipp},
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.iwslt-1.18/",
doi = "10.18653/v1/2024.iwslt-1.18",
pages = "134--139",
abstract = "We created a collection of speech data for 48 low resource languages. The corpus is extracted from radio broadcasts and processed with novel speech detection and language identification models based on a manually vetted subset of the audio for 10 languages. The data is made publicly available."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bamfo-odoom-etal-2024-speech">
<titleInfo>
<title>Speech Data from Radio Broadcasts for Low Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bismarck</namePart>
<namePart type="family">Bamfo Odoom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="given">Leibny</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prangthip</namePart>
<namePart type="family">Hansanti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Loïc</namePart>
<namePart type="family">Barrault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Ropers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Wiesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenton</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Mourachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We created a collection of speech data for 48 low resource languages. The corpus is extracted from radio broadcasts and processed with novel speech detection and language identification models based on a manually vetted subset of the audio for 10 languages. The data is made publicly available.</abstract>
<identifier type="citekey">bamfo-odoom-etal-2024-speech</identifier>
<identifier type="doi">10.18653/v1/2024.iwslt-1.18</identifier>
<location>
<url>https://aclanthology.org/2024.iwslt-1.18/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>134</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Data from Radio Broadcasts for Low Resource Languages
%A Bamfo Odoom, Bismarck
%A Garcia, Paola Leibny
%A Hansanti, Prangthip
%A Barrault, Loïc
%A Ropers, Christophe
%A Wiesner, Matthew
%A Murray, Kenton
%A Mourachko, Alex
%A Koehn, Philipp
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand (in-person and online)
%F bamfo-odoom-etal-2024-speech
%X We created a collection of speech data for 48 low resource languages. The corpus is extracted from radio broadcasts and processed with novel speech detection and language identification models based on a manually vetted subset of the audio for 10 languages. The data is made publicly available.
%R 10.18653/v1/2024.iwslt-1.18
%U https://aclanthology.org/2024.iwslt-1.18/
%U https://doi.org/10.18653/v1/2024.iwslt-1.18
%P 134-139
Markdown (Informal)
[Speech Data from Radio Broadcasts for Low Resource Languages](https://aclanthology.org/2024.iwslt-1.18/) (Bamfo Odoom et al., IWSLT 2024)
ACL
- Bismarck Bamfo Odoom, Paola Leibny Garcia, Prangthip Hansanti, Loïc Barrault, Christophe Ropers, Matthew Wiesner, Kenton Murray, Alex Mourachko, and Philipp Koehn. 2024. Speech Data from Radio Broadcasts for Low Resource Languages. In Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024), pages 134–139, Bangkok, Thailand (in-person and online). Association for Computational Linguistics.