@inproceedings{salaev-etal-2022-simreluz,
title = "{S}im{R}el{U}z: Similarity and Relatedness Scores as a Semantic Evaluation Dataset for {U}zbek Language",
author = "Salaev, Ulugbek and
Kuriyozov, Elmurod and
G{\'o}mez-Rodr{\'\i}guez, Carlos",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.sigul-1.26",
pages = "199--206",
abstract = "Semantic relatedness between words is one of the core concepts in natural language processing, thus making semantic evaluation an important task. In this paper, we present a semantic model evaluation dataset: SimRelUz - a collection of similarity and relatedness scores of word pairs for the low-resource Uzbek language. The dataset consists of more than a thousand pairs of words carefully selected based on their morphological features, occurrence frequency, semantic relation, as well as annotated by eleven native Uzbek speakers from different age groups and gender. We also paid attention to the problem of dealing with rare words and out-of-vocabulary words to thoroughly evaluate the robustness of semantic models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="salaev-etal-2022-simreluz">
<titleInfo>
<title>SimRelUz: Similarity and Relatedness Scores as a Semantic Evaluation Dataset for Uzbek Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ulugbek</namePart>
<namePart type="family">Salaev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elmurod</namePart>
<namePart type="family">Kuriyozov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Gómez-Rodríguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Semantic relatedness between words is one of the core concepts in natural language processing, thus making semantic evaluation an important task. In this paper, we present a semantic model evaluation dataset: SimRelUz - a collection of similarity and relatedness scores of word pairs for the low-resource Uzbek language. The dataset consists of more than a thousand pairs of words carefully selected based on their morphological features, occurrence frequency, semantic relation, as well as annotated by eleven native Uzbek speakers from different age groups and gender. We also paid attention to the problem of dealing with rare words and out-of-vocabulary words to thoroughly evaluate the robustness of semantic models.</abstract>
<identifier type="citekey">salaev-etal-2022-simreluz</identifier>
<location>
<url>https://aclanthology.org/2022.sigul-1.26</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>199</start>
<end>206</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SimRelUz: Similarity and Relatedness Scores as a Semantic Evaluation Dataset for Uzbek Language
%A Salaev, Ulugbek
%A Kuriyozov, Elmurod
%A Gómez-Rodríguez, Carlos
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F salaev-etal-2022-simreluz
%X Semantic relatedness between words is one of the core concepts in natural language processing, thus making semantic evaluation an important task. In this paper, we present a semantic model evaluation dataset: SimRelUz - a collection of similarity and relatedness scores of word pairs for the low-resource Uzbek language. The dataset consists of more than a thousand pairs of words carefully selected based on their morphological features, occurrence frequency, semantic relation, as well as annotated by eleven native Uzbek speakers from different age groups and gender. We also paid attention to the problem of dealing with rare words and out-of-vocabulary words to thoroughly evaluate the robustness of semantic models.
%U https://aclanthology.org/2022.sigul-1.26
%P 199-206
Markdown (Informal)
[SimRelUz: Similarity and Relatedness Scores as a Semantic Evaluation Dataset for Uzbek Language](https://aclanthology.org/2022.sigul-1.26) (Salaev et al., SIGUL 2022)
ACL