@inproceedings{k-r-etal-2020-malayalam,
title = "{M}alayalam Speech Corpus: Design and Development for {D}ravidian Language",
author = "K R, Lekshmi and
V S, Jithesh and
Sherly, Elizabeth",
editor = "Jha, Girish Nath and
Bali, Kalika and
L., Sobha and
Agrawal, S. S. and
Ojha, Atul Kr.",
booktitle = "Proceedings of the WILDRE5{--} 5th Workshop on Indian Language Data: Resources and Evaluation",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.wildre-1.5",
pages = "25--28",
abstract = "To overpass the disparity between theory and applications in language-related technology in the text as well as speech and several other areas, a well-designed and well-developed corpus is essential. Several problems and issues encountered while developing a corpus, especially for low resource languages. The Malayalam Speech Corpus (MSC) is one of the first open speech corpora for Automatic Speech Recognition (ASR) research to the best of our knowledge. It consists of 250 hours of Agricultural speech data. We are providing a transcription file, lexicon and annotated speech along with the audio segment. It is available in future for public use upon request at {``}www.iiitmk.ac.in/vrclc/utilities/ml{\_}speechcorpus{''}. This paper details the development and collection process in the domain of agricultural speech corpora in the Malayalam Language.",
language = "English",
ISBN = "979-10-95546-67-2",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="k-r-etal-2020-malayalam">
<titleInfo>
<title>Malayalam Speech Corpus: Design and Development for Dravidian Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lekshmi</namePart>
<namePart type="family">K R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jithesh</namePart>
<namePart type="family">V S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Girish</namePart>
<namePart type="given">Nath</namePart>
<namePart type="family">Jha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">L.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">S</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Agrawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-67-2</identifier>
</relatedItem>
<abstract>To overpass the disparity between theory and applications in language-related technology in the text as well as speech and several other areas, a well-designed and well-developed corpus is essential. Several problems and issues encountered while developing a corpus, especially for low resource languages. The Malayalam Speech Corpus (MSC) is one of the first open speech corpora for Automatic Speech Recognition (ASR) research to the best of our knowledge. It consists of 250 hours of Agricultural speech data. We are providing a transcription file, lexicon and annotated speech along with the audio segment. It is available in future for public use upon request at “www.iiitmk.ac.in/vrclc/utilities/ml_speechcorpus”. This paper details the development and collection process in the domain of agricultural speech corpora in the Malayalam Language.</abstract>
<identifier type="citekey">k-r-etal-2020-malayalam</identifier>
<location>
<url>https://aclanthology.org/2020.wildre-1.5</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>25</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Malayalam Speech Corpus: Design and Development for Dravidian Language
%A K R, Lekshmi
%A V S, Jithesh
%A Sherly, Elizabeth
%Y Jha, Girish Nath
%Y Bali, Kalika
%Y L., Sobha
%Y Agrawal, S. S.
%Y Ojha, Atul Kr.
%S Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-67-2
%G English
%F k-r-etal-2020-malayalam
%X To overpass the disparity between theory and applications in language-related technology in the text as well as speech and several other areas, a well-designed and well-developed corpus is essential. Several problems and issues encountered while developing a corpus, especially for low resource languages. The Malayalam Speech Corpus (MSC) is one of the first open speech corpora for Automatic Speech Recognition (ASR) research to the best of our knowledge. It consists of 250 hours of Agricultural speech data. We are providing a transcription file, lexicon and annotated speech along with the audio segment. It is available in future for public use upon request at “www.iiitmk.ac.in/vrclc/utilities/ml_speechcorpus”. This paper details the development and collection process in the domain of agricultural speech corpora in the Malayalam Language.
%U https://aclanthology.org/2020.wildre-1.5
%P 25-28
Markdown (Informal)
[Malayalam Speech Corpus: Design and Development for Dravidian Language](https://aclanthology.org/2020.wildre-1.5) (K R et al., WILDRE 2020)
ACL