@inproceedings{konopik-etal-2017-czech,
title = "{C}zech Dataset for Semantic Similarity and Relatedness",
author = "Konop{\'\i}k, Miloslav and
Pra{\v{z}}{\'a}k, Ond{\v{r}}ej and
Steinberger, David",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_053",
doi = "10.26615/978-954-452-049-6_053",
pages = "401--406",
abstract = "This paper introduces a Czech dataset for semantic similarity and semantic relatedness. The dataset contains word pairs with hand annotated scores that indicate the semantic similarity and semantic relatedness of the words. The dataset contains 953 word pairs compiled from 9 different sources. It contains words and their contexts taken from real text corpora including extra examples when the words are ambiguous. The dataset is annotated by 5 independent annotators. The average Spearman correlation coefficient of the annotation agreement is $r = 0.81$. We provide reference evaluation experiments with several methods for computing semantic similarity and relatedness.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="konopik-etal-2017-czech">
<titleInfo>
<title>Czech Dataset for Semantic Similarity and Relatedness</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miloslav</namePart>
<namePart type="family">Konopík</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Pražák</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces a Czech dataset for semantic similarity and semantic relatedness. The dataset contains word pairs with hand annotated scores that indicate the semantic similarity and semantic relatedness of the words. The dataset contains 953 word pairs compiled from 9 different sources. It contains words and their contexts taken from real text corpora including extra examples when the words are ambiguous. The dataset is annotated by 5 independent annotators. The average Spearman correlation coefficient of the annotation agreement is r = 0.81. We provide reference evaluation experiments with several methods for computing semantic similarity and relatedness.</abstract>
<identifier type="citekey">konopik-etal-2017-czech</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_053</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>401</start>
<end>406</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Czech Dataset for Semantic Similarity and Relatedness
%A Konopík, Miloslav
%A Pražák, Ondřej
%A Steinberger, David
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F konopik-etal-2017-czech
%X This paper introduces a Czech dataset for semantic similarity and semantic relatedness. The dataset contains word pairs with hand annotated scores that indicate the semantic similarity and semantic relatedness of the words. The dataset contains 953 word pairs compiled from 9 different sources. It contains words and their contexts taken from real text corpora including extra examples when the words are ambiguous. The dataset is annotated by 5 independent annotators. The average Spearman correlation coefficient of the annotation agreement is r = 0.81. We provide reference evaluation experiments with several methods for computing semantic similarity and relatedness.
%R 10.26615/978-954-452-049-6_053
%U https://doi.org/10.26615/978-954-452-049-6_053
%P 401-406
Markdown (Informal)
[Czech Dataset for Semantic Similarity and Relatedness](https://doi.org/10.26615/978-954-452-049-6_053) (Konopík et al., RANLP 2017)
ACL
- Miloslav Konopík, Ondřej Pražák, and David Steinberger. 2017. Czech Dataset for Semantic Similarity and Relatedness. In Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017, pages 401–406, Varna, Bulgaria. INCOMA Ltd..