@inproceedings{ezeani-etal-2017-lexical,
title = "Lexical Disambiguation of {I}gbo using Diacritic Restoration",
author = "Ezeani, Ignatius and
Hepple, Mark and
Onyenwe, Ikechukwu",
editor = "Camacho-Collados, Jose and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 1st Workshop on Sense, Concept and Entity Representations and their Applications",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1907",
doi = "10.18653/v1/W17-1907",
pages = "53--60",
abstract = "Properly written texts in Igbo, a low-resource African language, are rich in both orthographic and tonal diacritics. Diacritics are essential in capturing the distinctions in pronunciation and meaning of words, as well as in lexical disambiguation. Unfortunately, most electronic texts in diacritic languages are written without diacritics. This makes diacritic restoration a necessary step in corpus building and language processing tasks for languages with diacritics. In our previous work, we built some n-gram models with simple smoothing techniques based on a closed-world assumption. However, as a classification task, diacritic restoration is well suited for and will be more generalisable with machine learning. This paper, therefore, presents a more standard approach to dealing with the task which involves the application of machine learning algorithms.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ezeani-etal-2017-lexical">
<titleInfo>
<title>Lexical Disambiguation of Igbo using Diacritic Restoration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ignatius</namePart>
<namePart type="family">Ezeani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Hepple</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ikechukwu</namePart>
<namePart type="family">Onyenwe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Sense, Concept and Entity Representations and their Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-Collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Properly written texts in Igbo, a low-resource African language, are rich in both orthographic and tonal diacritics. Diacritics are essential in capturing the distinctions in pronunciation and meaning of words, as well as in lexical disambiguation. Unfortunately, most electronic texts in diacritic languages are written without diacritics. This makes diacritic restoration a necessary step in corpus building and language processing tasks for languages with diacritics. In our previous work, we built some n-gram models with simple smoothing techniques based on a closed-world assumption. However, as a classification task, diacritic restoration is well suited for and will be more generalisable with machine learning. This paper, therefore, presents a more standard approach to dealing with the task which involves the application of machine learning algorithms.</abstract>
<identifier type="citekey">ezeani-etal-2017-lexical</identifier>
<identifier type="doi">10.18653/v1/W17-1907</identifier>
<location>
<url>https://aclanthology.org/W17-1907</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>53</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lexical Disambiguation of Igbo using Diacritic Restoration
%A Ezeani, Ignatius
%A Hepple, Mark
%A Onyenwe, Ikechukwu
%Y Camacho-Collados, Jose
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 1st Workshop on Sense, Concept and Entity Representations and their Applications
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F ezeani-etal-2017-lexical
%X Properly written texts in Igbo, a low-resource African language, are rich in both orthographic and tonal diacritics. Diacritics are essential in capturing the distinctions in pronunciation and meaning of words, as well as in lexical disambiguation. Unfortunately, most electronic texts in diacritic languages are written without diacritics. This makes diacritic restoration a necessary step in corpus building and language processing tasks for languages with diacritics. In our previous work, we built some n-gram models with simple smoothing techniques based on a closed-world assumption. However, as a classification task, diacritic restoration is well suited for and will be more generalisable with machine learning. This paper, therefore, presents a more standard approach to dealing with the task which involves the application of machine learning algorithms.
%R 10.18653/v1/W17-1907
%U https://aclanthology.org/W17-1907
%U https://doi.org/10.18653/v1/W17-1907
%P 53-60
Markdown (Informal)
[Lexical Disambiguation of Igbo using Diacritic Restoration](https://aclanthology.org/W17-1907) (Ezeani et al., SENSE 2017)
ACL
- Ignatius Ezeani, Mark Hepple, and Ikechukwu Onyenwe. 2017. Lexical Disambiguation of Igbo using Diacritic Restoration. In Proceedings of the 1st Workshop on Sense, Concept and Entity Representations and their Applications, pages 53–60, Valencia, Spain. Association for Computational Linguistics.