@inproceedings{fernando-etal-2016-comprehensive,
title = "Comprehensive Part-Of-Speech Tag Set and {SVM} based {POS} Tagger for {S}inhala",
author = "Fernando, Sandareka and
Ranathunga, Surangika and
Jayasena, Sanath and
Dias, Gihan",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3718",
pages = "173--182",
abstract = "This paper presents a new comprehensive multi-level Part-Of-Speech tag set and a Support Vector Machine based Part-Of-Speech tagger for the Sinhala language. The currently available tag set for Sinhala has two limitations: the unavailability of tags to represent some word classes and the lack of tags to capture inflection based grammatical variations of words. The new tag set, presented in this paper overcomes both of these limitations. The accuracy of available Sinhala Part-Of-Speech taggers, which are based on Hidden Markov Models, still falls far behind state of the art. Our Support Vector Machine based tagger achieved an overall accuracy of 84.68{\%} with 59.86{\%} accuracy for unknown words and 87.12{\%} for known words, when the test set contains 10{\%} of unknown words.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fernando-etal-2016-comprehensive">
<titleInfo>
<title>Comprehensive Part-Of-Speech Tag Set and SVM based POS Tagger for Sinhala</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sandareka</namePart>
<namePart type="family">Fernando</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surangika</namePart>
<namePart type="family">Ranathunga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanath</namePart>
<namePart type="family">Jayasena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gihan</namePart>
<namePart type="family">Dias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a new comprehensive multi-level Part-Of-Speech tag set and a Support Vector Machine based Part-Of-Speech tagger for the Sinhala language. The currently available tag set for Sinhala has two limitations: the unavailability of tags to represent some word classes and the lack of tags to capture inflection based grammatical variations of words. The new tag set, presented in this paper overcomes both of these limitations. The accuracy of available Sinhala Part-Of-Speech taggers, which are based on Hidden Markov Models, still falls far behind state of the art. Our Support Vector Machine based tagger achieved an overall accuracy of 84.68% with 59.86% accuracy for unknown words and 87.12% for known words, when the test set contains 10% of unknown words.</abstract>
<identifier type="citekey">fernando-etal-2016-comprehensive</identifier>
<location>
<url>https://aclanthology.org/W16-3718</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>173</start>
<end>182</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comprehensive Part-Of-Speech Tag Set and SVM based POS Tagger for Sinhala
%A Fernando, Sandareka
%A Ranathunga, Surangika
%A Jayasena, Sanath
%A Dias, Gihan
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F fernando-etal-2016-comprehensive
%X This paper presents a new comprehensive multi-level Part-Of-Speech tag set and a Support Vector Machine based Part-Of-Speech tagger for the Sinhala language. The currently available tag set for Sinhala has two limitations: the unavailability of tags to represent some word classes and the lack of tags to capture inflection based grammatical variations of words. The new tag set, presented in this paper overcomes both of these limitations. The accuracy of available Sinhala Part-Of-Speech taggers, which are based on Hidden Markov Models, still falls far behind state of the art. Our Support Vector Machine based tagger achieved an overall accuracy of 84.68% with 59.86% accuracy for unknown words and 87.12% for known words, when the test set contains 10% of unknown words.
%U https://aclanthology.org/W16-3718
%P 173-182
Markdown (Informal)
[Comprehensive Part-Of-Speech Tag Set and SVM based POS Tagger for Sinhala](https://aclanthology.org/W16-3718) (Fernando et al., WSSANLP 2016)
ACL