@inproceedings{luong-vu-2016-non,
title = "A non-expert {K}aldi recipe for {V}ietnamese Speech Recognition System",
author = "Luong, Hieu-Thi and
Vu, Hai-Quan",
editor = "Murakami, Yohei and
Lin, Donghui and
Ide, Nancy and
Pustejovsky, James",
booktitle = "Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies ({WLSI}/{OIAF}4{HLT}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-5207",
pages = "51--55",
abstract = "In this paper we describe a non-expert setup for Vietnamese speech recognition system using Kaldi toolkit. We collected a speech corpus over fifteen hours from about fifty Vietnamese native speakers and using it to test the feasibility of our setup. The essential linguistic components for the Automatic Speech Recognition (ASR) system was prepared basing on the written form of the language instead of expertise knowledge on linguistic and phonology as commonly seen in rich resource languages like English. The modeling of tones by integrating them into the phoneme and using the phonetic decision tree is also discussed. Experimental results showed this setup for ASR systems does yield competitive results while still have potentials for further improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luong-vu-2016-non">
<titleInfo>
<title>A non-expert Kaldi recipe for Vietnamese Speech Recognition System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hieu-Thi</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hai-Quan</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI/OIAF4HLT2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Murakami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Donghui</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nancy</namePart>
<namePart type="family">Ide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe a non-expert setup for Vietnamese speech recognition system using Kaldi toolkit. We collected a speech corpus over fifteen hours from about fifty Vietnamese native speakers and using it to test the feasibility of our setup. The essential linguistic components for the Automatic Speech Recognition (ASR) system was prepared basing on the written form of the language instead of expertise knowledge on linguistic and phonology as commonly seen in rich resource languages like English. The modeling of tones by integrating them into the phoneme and using the phonetic decision tree is also discussed. Experimental results showed this setup for ASR systems does yield competitive results while still have potentials for further improvements.</abstract>
<identifier type="citekey">luong-vu-2016-non</identifier>
<location>
<url>https://aclanthology.org/W16-5207</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>51</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A non-expert Kaldi recipe for Vietnamese Speech Recognition System
%A Luong, Hieu-Thi
%A Vu, Hai-Quan
%Y Murakami, Yohei
%Y Lin, Donghui
%Y Ide, Nancy
%Y Pustejovsky, James
%S Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI/OIAF4HLT2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F luong-vu-2016-non
%X In this paper we describe a non-expert setup for Vietnamese speech recognition system using Kaldi toolkit. We collected a speech corpus over fifteen hours from about fifty Vietnamese native speakers and using it to test the feasibility of our setup. The essential linguistic components for the Automatic Speech Recognition (ASR) system was prepared basing on the written form of the language instead of expertise knowledge on linguistic and phonology as commonly seen in rich resource languages like English. The modeling of tones by integrating them into the phoneme and using the phonetic decision tree is also discussed. Experimental results showed this setup for ASR systems does yield competitive results while still have potentials for further improvements.
%U https://aclanthology.org/W16-5207
%P 51-55
Markdown (Informal)
[A non-expert Kaldi recipe for Vietnamese Speech Recognition System](https://aclanthology.org/W16-5207) (Luong & Vu, OIAF4HLT 2016)
ACL
- Hieu-Thi Luong and Hai-Quan Vu. 2016. A non-expert Kaldi recipe for Vietnamese Speech Recognition System. In Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI/OIAF4HLT2016), pages 51–55, Osaka, Japan. The COLING 2016 Organizing Committee.