@inproceedings{arikan-etal-2019-detecting,
title = "Detecting Clitics Related Orthographic Errors in {T}urkish",
author = "Arikan, Ugurcan and
Gungor, Onur and
Uskudarli, Suzan",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1009/",
doi = "10.26615/978-954-452-056-4_009",
pages = "71--76",
abstract = "For the spell correction task, vocabulary based methods have been replaced with methods that take morphological and grammar rules into account. However, such tools are fairly immature, and, worse, non-existent for many low resource languages. Checking only if a word is well-formed with respect to the morphological rules of a language may produce false negatives due to the ambiguity resulting from the presence of numerous homophonic words. In this work, we propose an approach to detect and correct the {\textquotedblleft}de/da{\textquotedblright} clitic errors in Turkish text. Our model is a neural sequence tagger trained with a synthetically constructed dataset consisting of positive and negative samples. The model`s performance with this dataset is presented according to different word embedding configurations. The model achieved an F1 score of 86.67{\%} on a synthetically constructed dataset. We also compared the model`s performance on a manually curated dataset of challenging samples that proved superior to other spelling correctors with 71{\%} accuracy compared to the second-best (Google Docs) with and accuracy of 34{\%}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arikan-etal-2019-detecting">
<titleInfo>
<title>Detecting Clitics Related Orthographic Errors in Turkish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ugurcan</namePart>
<namePart type="family">Arikan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onur</namePart>
<namePart type="family">Gungor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzan</namePart>
<namePart type="family">Uskudarli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>For the spell correction task, vocabulary based methods have been replaced with methods that take morphological and grammar rules into account. However, such tools are fairly immature, and, worse, non-existent for many low resource languages. Checking only if a word is well-formed with respect to the morphological rules of a language may produce false negatives due to the ambiguity resulting from the presence of numerous homophonic words. In this work, we propose an approach to detect and correct the “de/da” clitic errors in Turkish text. Our model is a neural sequence tagger trained with a synthetically constructed dataset consisting of positive and negative samples. The model‘s performance with this dataset is presented according to different word embedding configurations. The model achieved an F1 score of 86.67% on a synthetically constructed dataset. We also compared the model‘s performance on a manually curated dataset of challenging samples that proved superior to other spelling correctors with 71% accuracy compared to the second-best (Google Docs) with and accuracy of 34%.</abstract>
<identifier type="citekey">arikan-etal-2019-detecting</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_009</identifier>
<location>
<url>https://aclanthology.org/R19-1009/</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>71</start>
<end>76</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Clitics Related Orthographic Errors in Turkish
%A Arikan, Ugurcan
%A Gungor, Onur
%A Uskudarli, Suzan
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F arikan-etal-2019-detecting
%X For the spell correction task, vocabulary based methods have been replaced with methods that take morphological and grammar rules into account. However, such tools are fairly immature, and, worse, non-existent for many low resource languages. Checking only if a word is well-formed with respect to the morphological rules of a language may produce false negatives due to the ambiguity resulting from the presence of numerous homophonic words. In this work, we propose an approach to detect and correct the “de/da” clitic errors in Turkish text. Our model is a neural sequence tagger trained with a synthetically constructed dataset consisting of positive and negative samples. The model‘s performance with this dataset is presented according to different word embedding configurations. The model achieved an F1 score of 86.67% on a synthetically constructed dataset. We also compared the model‘s performance on a manually curated dataset of challenging samples that proved superior to other spelling correctors with 71% accuracy compared to the second-best (Google Docs) with and accuracy of 34%.
%R 10.26615/978-954-452-056-4_009
%U https://aclanthology.org/R19-1009/
%U https://doi.org/10.26615/978-954-452-056-4_009
%P 71-76
Markdown (Informal)
[Detecting Clitics Related Orthographic Errors in Turkish](https://aclanthology.org/R19-1009/) (Arikan et al., RANLP 2019)
ACL