@inproceedings{kadupitiya-etal-2016-sinhala,
title = "{S}inhala Short Sentence Similarity Calculation using Corpus-Based and Knowledge-Based Similarity Measures",
author = "Kadupitiya, Jcs and
Ranathunga, Surangika and
Dias, Gihan",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3705/",
pages = "44--53",
abstract = "Currently, corpus based-similarity, string-based similarity, and knowledge-based similarity techniques are used to compare short phrases. However, no work has been conducted on the similarity of phrases in Sinhala language. In this paper, we present a hybrid methodology to compute the similarity between two Sinhala sentences using a Semantic Similarity Measurement technique (corpus-based similarity measurement plus knowledge-based similarity measurement) that makes use of word order information. Since Sinhala WordNet is still under construction, we used lexical resources in performing this semantic similarity calculation. Evaluation using 4000 sentence pairs yielded an average MSE of 0.145 and a Pearson correla-tion factor of 0.832."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kadupitiya-etal-2016-sinhala">
<titleInfo>
<title>Sinhala Short Sentence Similarity Calculation using Corpus-Based and Knowledge-Based Similarity Measures</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jcs</namePart>
<namePart type="family">Kadupitiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surangika</namePart>
<namePart type="family">Ranathunga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gihan</namePart>
<namePart type="family">Dias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Currently, corpus based-similarity, string-based similarity, and knowledge-based similarity techniques are used to compare short phrases. However, no work has been conducted on the similarity of phrases in Sinhala language. In this paper, we present a hybrid methodology to compute the similarity between two Sinhala sentences using a Semantic Similarity Measurement technique (corpus-based similarity measurement plus knowledge-based similarity measurement) that makes use of word order information. Since Sinhala WordNet is still under construction, we used lexical resources in performing this semantic similarity calculation. Evaluation using 4000 sentence pairs yielded an average MSE of 0.145 and a Pearson correla-tion factor of 0.832.</abstract>
<identifier type="citekey">kadupitiya-etal-2016-sinhala</identifier>
<location>
<url>https://aclanthology.org/W16-3705/</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>44</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sinhala Short Sentence Similarity Calculation using Corpus-Based and Knowledge-Based Similarity Measures
%A Kadupitiya, Jcs
%A Ranathunga, Surangika
%A Dias, Gihan
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F kadupitiya-etal-2016-sinhala
%X Currently, corpus based-similarity, string-based similarity, and knowledge-based similarity techniques are used to compare short phrases. However, no work has been conducted on the similarity of phrases in Sinhala language. In this paper, we present a hybrid methodology to compute the similarity between two Sinhala sentences using a Semantic Similarity Measurement technique (corpus-based similarity measurement plus knowledge-based similarity measurement) that makes use of word order information. Since Sinhala WordNet is still under construction, we used lexical resources in performing this semantic similarity calculation. Evaluation using 4000 sentence pairs yielded an average MSE of 0.145 and a Pearson correla-tion factor of 0.832.
%U https://aclanthology.org/W16-3705/
%P 44-53
Markdown (Informal)
[Sinhala Short Sentence Similarity Calculation using Corpus-Based and Knowledge-Based Similarity Measures](https://aclanthology.org/W16-3705/) (Kadupitiya et al., WSSANLP 2016)
ACL