@inproceedings{bahrani-etal-2006-building,
title = "Building and Incorporating Language Models for {P}ersian Continuous Speech Recognition Systems",
author = "Bahrani, M. and
Sameti, H. and
Hafezi, N. and
Movasagh, H.",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/36_pdf.pdf",
abstract = "In this paper building statistical language models for Persian language using a corpus and incorporating them in Persian continuous speech recognition (CSR) system are described. We used Persian Text Corpus for building the language models. First we preprocessed the texts of corpus by correcting the different orthography of words. Also, the number of POS tags was decreased by clustering POS tags manually. Then we extracted word based monogram and POS-based bigram and trigram language models from the corpus. We also present the procedure of incorporating language models in a Persian CSR system. By using the language models 27.4{\%} reduction in word error rate was achieved in the best case.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bahrani-etal-2006-building">
<titleInfo>
<title>Building and Incorporating Language Models for Persian Continuous Speech Recognition Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="family">Bahrani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="family">Sameti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">N</namePart>
<namePart type="family">Hafezi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="family">Movasagh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper building statistical language models for Persian language using a corpus and incorporating them in Persian continuous speech recognition (CSR) system are described. We used Persian Text Corpus for building the language models. First we preprocessed the texts of corpus by correcting the different orthography of words. Also, the number of POS tags was decreased by clustering POS tags manually. Then we extracted word based monogram and POS-based bigram and trigram language models from the corpus. We also present the procedure of incorporating language models in a Persian CSR system. By using the language models 27.4% reduction in word error rate was achieved in the best case.</abstract>
<identifier type="citekey">bahrani-etal-2006-building</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/36_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building and Incorporating Language Models for Persian Continuous Speech Recognition Systems
%A Bahrani, M.
%A Sameti, H.
%A Hafezi, N.
%A Movasagh, H.
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F bahrani-etal-2006-building
%X In this paper building statistical language models for Persian language using a corpus and incorporating them in Persian continuous speech recognition (CSR) system are described. We used Persian Text Corpus for building the language models. First we preprocessed the texts of corpus by correcting the different orthography of words. Also, the number of POS tags was decreased by clustering POS tags manually. Then we extracted word based monogram and POS-based bigram and trigram language models from the corpus. We also present the procedure of incorporating language models in a Persian CSR system. By using the language models 27.4% reduction in word error rate was achieved in the best case.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/36_pdf.pdf
Markdown (Informal)
[Building and Incorporating Language Models for Persian Continuous Speech Recognition Systems](http://www.lrec-conf.org/proceedings/lrec2006/pdf/36_pdf.pdf) (Bahrani et al., LREC 2006)
ACL