@inproceedings{ezeani-etal-2018-igbo,
title = "{I}gbo Diacritic Restoration using Embedding Models",
author = "Ezeani, Ignatius and
Hepple, Mark and
Onyenwe, Ikechukwu and
Chioma, Enemouh",
editor = "Cordeiro, Silvio Ricardo and
Oraby, Shereen and
Pavalanathan, Umashanthi and
Rim, Kyeongmin",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = jun,
year = "2018",
address = "New Orleans, Louisiana, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N18-4008",
doi = "10.18653/v1/N18-4008",
pages = "54--60",
abstract = "Igbo is a low-resource language spoken by approximately 30 million people worldwide. It is the native language of the Igbo people of south-eastern Nigeria. In Igbo language, diacritics - orthographic and tonal - play a huge role in the distinguishing the meaning and pronunciation of words. Omitting diacritics in texts often leads to lexical ambiguity. Diacritic restoration is a pre-processing task that replaces missing diacritics on words from which they have been removed. In this work, we applied embedding models to the diacritic restoration task and compared their performances to those of n-gram models. Although word embedding models have been successfully applied to various NLP tasks, it has not been used, to our knowledge, for diacritic restoration. Two classes of word embeddings models were used: those projected from the English embedding space; and those trained with Igbo bible corpus ({\mbox{$\approx$}} 1m). Our best result, 82.49{\%}, is an improvement on the baseline n-gram models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ezeani-etal-2018-igbo">
<titleInfo>
<title>Igbo Diacritic Restoration using Embedding Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ignatius</namePart>
<namePart type="family">Ezeani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Hepple</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ikechukwu</namePart>
<namePart type="family">Onyenwe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enemouh</namePart>
<namePart type="family">Chioma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Silvio</namePart>
<namePart type="given">Ricardo</namePart>
<namePart type="family">Cordeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shereen</namePart>
<namePart type="family">Oraby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Umashanthi</namePart>
<namePart type="family">Pavalanathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyeongmin</namePart>
<namePart type="family">Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Igbo is a low-resource language spoken by approximately 30 million people worldwide. It is the native language of the Igbo people of south-eastern Nigeria. In Igbo language, diacritics - orthographic and tonal - play a huge role in the distinguishing the meaning and pronunciation of words. Omitting diacritics in texts often leads to lexical ambiguity. Diacritic restoration is a pre-processing task that replaces missing diacritics on words from which they have been removed. In this work, we applied embedding models to the diacritic restoration task and compared their performances to those of n-gram models. Although word embedding models have been successfully applied to various NLP tasks, it has not been used, to our knowledge, for diacritic restoration. Two classes of word embeddings models were used: those projected from the English embedding space; and those trained with Igbo bible corpus (\approx 1m). Our best result, 82.49%, is an improvement on the baseline n-gram models.</abstract>
<identifier type="citekey">ezeani-etal-2018-igbo</identifier>
<identifier type="doi">10.18653/v1/N18-4008</identifier>
<location>
<url>https://aclanthology.org/N18-4008</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>54</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Igbo Diacritic Restoration using Embedding Models
%A Ezeani, Ignatius
%A Hepple, Mark
%A Onyenwe, Ikechukwu
%A Chioma, Enemouh
%Y Cordeiro, Silvio Ricardo
%Y Oraby, Shereen
%Y Pavalanathan, Umashanthi
%Y Rim, Kyeongmin
%S Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana, USA
%F ezeani-etal-2018-igbo
%X Igbo is a low-resource language spoken by approximately 30 million people worldwide. It is the native language of the Igbo people of south-eastern Nigeria. In Igbo language, diacritics - orthographic and tonal - play a huge role in the distinguishing the meaning and pronunciation of words. Omitting diacritics in texts often leads to lexical ambiguity. Diacritic restoration is a pre-processing task that replaces missing diacritics on words from which they have been removed. In this work, we applied embedding models to the diacritic restoration task and compared their performances to those of n-gram models. Although word embedding models have been successfully applied to various NLP tasks, it has not been used, to our knowledge, for diacritic restoration. Two classes of word embeddings models were used: those projected from the English embedding space; and those trained with Igbo bible corpus (\approx 1m). Our best result, 82.49%, is an improvement on the baseline n-gram models.
%R 10.18653/v1/N18-4008
%U https://aclanthology.org/N18-4008
%U https://doi.org/10.18653/v1/N18-4008
%P 54-60
Markdown (Informal)
[Igbo Diacritic Restoration using Embedding Models](https://aclanthology.org/N18-4008) (Ezeani et al., NAACL 2018)
ACL
- Ignatius Ezeani, Mark Hepple, Ikechukwu Onyenwe, and Enemouh Chioma. 2018. Igbo Diacritic Restoration using Embedding Models. In Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop, pages 54–60, New Orleans, Louisiana, USA. Association for Computational Linguistics.