@inproceedings{abera-hailemariam-2020-tigrinya,
title = "{T}igrinya Automatic Speech recognition with Morpheme based recognition units",
author = "Abera, Hafte and
Hailemariam, Sebsibe",
editor = "Cunha, Rossana and
Shaikh, Samira and
Varis, Erika and
Georgi, Ryan and
Tsai, Alicia and
Anastasopoulos, Antonios and
Chandu, Khyathi Raghavi",
booktitle = "Proceedings of the Fourth Widening Natural Language Processing Workshop",
month = jul,
year = "2020",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.winlp-1.12",
doi = "10.18653/v1/2020.winlp-1.12",
pages = "46--50",
abstract = "The Tigrinya language is agglutinative and has a large number of inflected and derived forms of words. Therefore a Tigrinya large vocabulary continuous speech recognition system often has a large number of different units and a high out-of-vocabulary (OOV) rate if a word is used as a recognition unit of a language model (LM) and lexicon. Therefore a morpheme-based approach has often been used and a morpheme is used as the recognition unit to reduce the high OOV rate. This paper presents an automatic speech recognition experiment conducted to see the effect of OOV words on the performance speech recognition system for Tigrinya. We tried to solve the OOV problem by using morphemes as lexicon and language model units. It has been found that the morpheme-based recognition system is better lexical and language modeling units than words. An absolute improvement (in word recognition accuracy) of 3.45 token and 8.36 types has been obtained as a result of using a morph-based vocabulary.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abera-hailemariam-2020-tigrinya">
<titleInfo>
<title>Tigrinya Automatic Speech recognition with Morpheme based recognition units</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hafte</namePart>
<namePart type="family">Abera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebsibe</namePart>
<namePart type="family">Hailemariam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Widening Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rossana</namePart>
<namePart type="family">Cunha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erika</namePart>
<namePart type="family">Varis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Georgi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Tsai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="given">Raghavi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Tigrinya language is agglutinative and has a large number of inflected and derived forms of words. Therefore a Tigrinya large vocabulary continuous speech recognition system often has a large number of different units and a high out-of-vocabulary (OOV) rate if a word is used as a recognition unit of a language model (LM) and lexicon. Therefore a morpheme-based approach has often been used and a morpheme is used as the recognition unit to reduce the high OOV rate. This paper presents an automatic speech recognition experiment conducted to see the effect of OOV words on the performance speech recognition system for Tigrinya. We tried to solve the OOV problem by using morphemes as lexicon and language model units. It has been found that the morpheme-based recognition system is better lexical and language modeling units than words. An absolute improvement (in word recognition accuracy) of 3.45 token and 8.36 types has been obtained as a result of using a morph-based vocabulary.</abstract>
<identifier type="citekey">abera-hailemariam-2020-tigrinya</identifier>
<identifier type="doi">10.18653/v1/2020.winlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2020.winlp-1.12</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>46</start>
<end>50</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tigrinya Automatic Speech recognition with Morpheme based recognition units
%A Abera, Hafte
%A Hailemariam, Sebsibe
%Y Cunha, Rossana
%Y Shaikh, Samira
%Y Varis, Erika
%Y Georgi, Ryan
%Y Tsai, Alicia
%Y Anastasopoulos, Antonios
%Y Chandu, Khyathi Raghavi
%S Proceedings of the Fourth Widening Natural Language Processing Workshop
%D 2020
%8 July
%I Association for Computational Linguistics
%C Seattle, USA
%F abera-hailemariam-2020-tigrinya
%X The Tigrinya language is agglutinative and has a large number of inflected and derived forms of words. Therefore a Tigrinya large vocabulary continuous speech recognition system often has a large number of different units and a high out-of-vocabulary (OOV) rate if a word is used as a recognition unit of a language model (LM) and lexicon. Therefore a morpheme-based approach has often been used and a morpheme is used as the recognition unit to reduce the high OOV rate. This paper presents an automatic speech recognition experiment conducted to see the effect of OOV words on the performance speech recognition system for Tigrinya. We tried to solve the OOV problem by using morphemes as lexicon and language model units. It has been found that the morpheme-based recognition system is better lexical and language modeling units than words. An absolute improvement (in word recognition accuracy) of 3.45 token and 8.36 types has been obtained as a result of using a morph-based vocabulary.
%R 10.18653/v1/2020.winlp-1.12
%U https://aclanthology.org/2020.winlp-1.12
%U https://doi.org/10.18653/v1/2020.winlp-1.12
%P 46-50
Markdown (Informal)
[Tigrinya Automatic Speech recognition with Morpheme based recognition units](https://aclanthology.org/2020.winlp-1.12) (Abera & Hailemariam, WiNLP 2020)
ACL