@inproceedings{agrawal-etal-2012-development,
title = "Development of Text and Speech database for {H}indi and {I}ndian {E}nglish specific to Mobile Communication environment",
author = "Agrawal, Shyam and
Sinha, Shweta and
Singh, Pooja and
Olson, Jesper",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/1132_Paper.pdf",
pages = "3415--3421",
abstract = "This paper describes the method and experiences of text and speech data collection in mobile communication in Indian English Hindi. The primary data collection is done in the form of large number of messages as part of Personal communication among natives of Hindi language and Indian speakers of English. To gather the versatility of mobile communication database among Hindi and English, 12 domains were identified for collection of text corpus from speaking population belonging to deferent age groups, sex and dialects. The text obtained in raw form based on slangs and unconventional grammar were cleaned using on language grammar rules and then tagged and expanded to explain context specific meaning of the words. Texts of 1163 participants from Hindi speaking regions and 1405 English users were taken for creating 13 prompt sheets; containing 630 phonetically rich sentences created using a special software. Each prompt sheet was recorded by at least 7 users simultaneously in three channels and recorded by a total of 100 speakers and annotated. The work is a step forward in the direction of development of standards for mobile text and speech data collection for Indian languages. Keywords - Speech data base, Text analysis, mobile communication, Hindi and Indian English Speech, multi-lingual speech processing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="agrawal-etal-2012-development">
<titleInfo>
<title>Development of Text and Speech database for Hindi and Indian English specific to Mobile Communication environment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Agrawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shweta</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pooja</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesper</namePart>
<namePart type="family">Olson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the method and experiences of text and speech data collection in mobile communication in Indian English Hindi. The primary data collection is done in the form of large number of messages as part of Personal communication among natives of Hindi language and Indian speakers of English. To gather the versatility of mobile communication database among Hindi and English, 12 domains were identified for collection of text corpus from speaking population belonging to deferent age groups, sex and dialects. The text obtained in raw form based on slangs and unconventional grammar were cleaned using on language grammar rules and then tagged and expanded to explain context specific meaning of the words. Texts of 1163 participants from Hindi speaking regions and 1405 English users were taken for creating 13 prompt sheets; containing 630 phonetically rich sentences created using a special software. Each prompt sheet was recorded by at least 7 users simultaneously in three channels and recorded by a total of 100 speakers and annotated. The work is a step forward in the direction of development of standards for mobile text and speech data collection for Indian languages. Keywords - Speech data base, Text analysis, mobile communication, Hindi and Indian English Speech, multi-lingual speech processing.</abstract>
<identifier type="citekey">agrawal-etal-2012-development</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1132_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>3415</start>
<end>3421</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Development of Text and Speech database for Hindi and Indian English specific to Mobile Communication environment
%A Agrawal, Shyam
%A Sinha, Shweta
%A Singh, Pooja
%A Olson, Jesper
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F agrawal-etal-2012-development
%X This paper describes the method and experiences of text and speech data collection in mobile communication in Indian English Hindi. The primary data collection is done in the form of large number of messages as part of Personal communication among natives of Hindi language and Indian speakers of English. To gather the versatility of mobile communication database among Hindi and English, 12 domains were identified for collection of text corpus from speaking population belonging to deferent age groups, sex and dialects. The text obtained in raw form based on slangs and unconventional grammar were cleaned using on language grammar rules and then tagged and expanded to explain context specific meaning of the words. Texts of 1163 participants from Hindi speaking regions and 1405 English users were taken for creating 13 prompt sheets; containing 630 phonetically rich sentences created using a special software. Each prompt sheet was recorded by at least 7 users simultaneously in three channels and recorded by a total of 100 speakers and annotated. The work is a step forward in the direction of development of standards for mobile text and speech data collection for Indian languages. Keywords - Speech data base, Text analysis, mobile communication, Hindi and Indian English Speech, multi-lingual speech processing.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/1132_Paper.pdf
%P 3415-3421
Markdown (Informal)
[Development of Text and Speech database for Hindi and Indian English specific to Mobile Communication environment](http://www.lrec-conf.org/proceedings/lrec2012/pdf/1132_Paper.pdf) (Agrawal et al., LREC 2012)
ACL