@InProceedings{konopik-pravzak-steinberger:2017:RANLP,
  author    = {Konopik, Miloslav  and  Pra\v{z}\'{a}k, Ond\v{r}ej  and  Steinberger, David},
  title     = {Czech Dataset for Semantic Similarity and Relatedness},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {401--406},
  abstract  = {This paper introduces a Czech dataset for semantic similarity and semantic
	relatedness. The dataset contains word pairs with hand annotated scores that
	indicate the semantic similarity and semantic relatedness of the words. The
	dataset contains 953 word pairs compiled from 9 different sources. It contains
	words and their contexts taken from real text corpora including extra examples
	when the words are ambiguous. The dataset is annotated by 5 independent
	annotators. The average Spearman correlation coefficient of the annotation
	agreement is $r = 0.81$. We provide reference evaluation experiments with
	several methods for computing semantic similarity and relatedness.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_053}
}

