@inproceedings{krstev-etal-2018-knowledge,
title = "Knowledge and Rule-Based Diacritic Restoration in {S}erbian",
author = "Krstev, Cvetana and
Stankovi{\'c}, Ranka and
Vitas, Du{\v{s}}ko",
booktitle = "Proceedings of the Third International Conference on Computational Linguistics in Bulgaria (CLIB 2018)",
month = may,
year = "2018",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences",
url = "https://aclanthology.org/2018.clib-1.7",
pages = "41--51",
abstract = "In this paper we present a procedure for the restoration of diacritics in Serbian texts written using the degraded Latin alphabet. The procedure relies on the comprehensive lexical resources for Serbian: the morphological electronic dictionaries, the Corpus of Contemporary Serbian and local grammars. Dictionaries are used to identify possible candidates for the restoration, while the data obtained from SrpKor and local grammars assists in making a decision between several candidates in cases of ambiguity. The evaluation results reveal that, depending on the text, accuracy ranges from 95.03{\%} to 99.36{\%}, while the precision (average 98.93{\%}) is always higher than the recall (average 94.94{\%}).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krstev-etal-2018-knowledge">
<titleInfo>
<title>Knowledge and Rule-Based Diacritic Restoration in Serbian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cvetana</namePart>
<namePart type="family">Krstev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranka</namePart>
<namePart type="family">Stanković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Duško</namePart>
<namePart type="family">Vitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third International Conference on Computational Linguistics in Bulgaria (CLIB 2018)</title>
</titleInfo>
<originInfo>
<publisher>Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a procedure for the restoration of diacritics in Serbian texts written using the degraded Latin alphabet. The procedure relies on the comprehensive lexical resources for Serbian: the morphological electronic dictionaries, the Corpus of Contemporary Serbian and local grammars. Dictionaries are used to identify possible candidates for the restoration, while the data obtained from SrpKor and local grammars assists in making a decision between several candidates in cases of ambiguity. The evaluation results reveal that, depending on the text, accuracy ranges from 95.03% to 99.36%, while the precision (average 98.93%) is always higher than the recall (average 94.94%).</abstract>
<identifier type="citekey">krstev-etal-2018-knowledge</identifier>
<location>
<url>https://aclanthology.org/2018.clib-1.7</url>
</location>
<part>
<date>2018-05</date>
<extent unit="page">
<start>41</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Knowledge and Rule-Based Diacritic Restoration in Serbian
%A Krstev, Cvetana
%A Stanković, Ranka
%A Vitas, Duško
%S Proceedings of the Third International Conference on Computational Linguistics in Bulgaria (CLIB 2018)
%D 2018
%8 May
%I Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences
%C Sofia, Bulgaria
%F krstev-etal-2018-knowledge
%X In this paper we present a procedure for the restoration of diacritics in Serbian texts written using the degraded Latin alphabet. The procedure relies on the comprehensive lexical resources for Serbian: the morphological electronic dictionaries, the Corpus of Contemporary Serbian and local grammars. Dictionaries are used to identify possible candidates for the restoration, while the data obtained from SrpKor and local grammars assists in making a decision between several candidates in cases of ambiguity. The evaluation results reveal that, depending on the text, accuracy ranges from 95.03% to 99.36%, while the precision (average 98.93%) is always higher than the recall (average 94.94%).
%U https://aclanthology.org/2018.clib-1.7
%P 41-51
Markdown (Informal)
[Knowledge and Rule-Based Diacritic Restoration in Serbian](https://aclanthology.org/2018.clib-1.7) (Krstev et al., CLIB 2018)
ACL
- Cvetana Krstev, Ranka Stanković, and Duško Vitas. 2018. Knowledge and Rule-Based Diacritic Restoration in Serbian. In Proceedings of the Third International Conference on Computational Linguistics in Bulgaria (CLIB 2018), pages 41–51, Sofia, Bulgaria. Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences.