@inproceedings{abdulrahman-hassani-2022-language,
title = "A Language Model for Spell Checking of Educational Texts in {K}urdish ({S}orani)",
author = "Abdulrahman, Roshna and
Hassani, Hossein",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.sigul-1.25",
pages = "189--198",
abstract = "Spell checkers are an integrated feature of most software applications handling text inputs. When we write an email or compile a report on a desktop or a smartphone editor, a spell checker could be activated that assists us to write more correctly. However, this assistance does not exist for all languages equally. The Kurdish language, which still is considered a less-resourced language, currently lacks spell checkers for its various dialects. We present a trigram language model for the Sorani dialect of the Kurdish language that is created using educational text. We also showcase a spell checker for the Sorani dialect of Kurdish that can assist in writing texts in the Persian/Arabic script. The spell checker was developed as a testing environment for the language model. Primarily, we use the probabilistic method and our trigram language model with Stupid Backoff smoothing for the spell checking algorithm. Our spell checker has been trained on the KTC (Kurdish Textbook Corpus) dataset. Hence the system aims at assisting spell checking in the related context. We test our approach by developing a text processing environment that checks for spelling errors on a word and context basis. It suggests a list of corrections for misspelled words. The developed spell checker shows 88.54{\%} accuracy on the texts in the related context and it has an F1 score of 43.33{\%}, and the correct suggestion has an 85{\%} chance of being in the top three positions of the corrections.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdulrahman-hassani-2022-language">
<titleInfo>
<title>A Language Model for Spell Checking of Educational Texts in Kurdish (Sorani)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roshna</namePart>
<namePart type="family">Abdulrahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hossein</namePart>
<namePart type="family">Hassani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spell checkers are an integrated feature of most software applications handling text inputs. When we write an email or compile a report on a desktop or a smartphone editor, a spell checker could be activated that assists us to write more correctly. However, this assistance does not exist for all languages equally. The Kurdish language, which still is considered a less-resourced language, currently lacks spell checkers for its various dialects. We present a trigram language model for the Sorani dialect of the Kurdish language that is created using educational text. We also showcase a spell checker for the Sorani dialect of Kurdish that can assist in writing texts in the Persian/Arabic script. The spell checker was developed as a testing environment for the language model. Primarily, we use the probabilistic method and our trigram language model with Stupid Backoff smoothing for the spell checking algorithm. Our spell checker has been trained on the KTC (Kurdish Textbook Corpus) dataset. Hence the system aims at assisting spell checking in the related context. We test our approach by developing a text processing environment that checks for spelling errors on a word and context basis. It suggests a list of corrections for misspelled words. The developed spell checker shows 88.54% accuracy on the texts in the related context and it has an F1 score of 43.33%, and the correct suggestion has an 85% chance of being in the top three positions of the corrections.</abstract>
<identifier type="citekey">abdulrahman-hassani-2022-language</identifier>
<location>
<url>https://aclanthology.org/2022.sigul-1.25</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>189</start>
<end>198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Language Model for Spell Checking of Educational Texts in Kurdish (Sorani)
%A Abdulrahman, Roshna
%A Hassani, Hossein
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F abdulrahman-hassani-2022-language
%X Spell checkers are an integrated feature of most software applications handling text inputs. When we write an email or compile a report on a desktop or a smartphone editor, a spell checker could be activated that assists us to write more correctly. However, this assistance does not exist for all languages equally. The Kurdish language, which still is considered a less-resourced language, currently lacks spell checkers for its various dialects. We present a trigram language model for the Sorani dialect of the Kurdish language that is created using educational text. We also showcase a spell checker for the Sorani dialect of Kurdish that can assist in writing texts in the Persian/Arabic script. The spell checker was developed as a testing environment for the language model. Primarily, we use the probabilistic method and our trigram language model with Stupid Backoff smoothing for the spell checking algorithm. Our spell checker has been trained on the KTC (Kurdish Textbook Corpus) dataset. Hence the system aims at assisting spell checking in the related context. We test our approach by developing a text processing environment that checks for spelling errors on a word and context basis. It suggests a list of corrections for misspelled words. The developed spell checker shows 88.54% accuracy on the texts in the related context and it has an F1 score of 43.33%, and the correct suggestion has an 85% chance of being in the top three positions of the corrections.
%U https://aclanthology.org/2022.sigul-1.25
%P 189-198
Markdown (Informal)
[A Language Model for Spell Checking of Educational Texts in Kurdish (Sorani)](https://aclanthology.org/2022.sigul-1.25) (Abdulrahman & Hassani, SIGUL 2022)
ACL