@inproceedings{aksenova-etal-2022-rudsi,
title = "{R}u{DSI}: Graph-based Word Sense Induction Dataset for {R}ussian",
author = "Aksenova, Anna and
Gavrishina, Ekaterina and
Rykov, Elisei and
Kutuzov, Andrey",
editor = "Ustalov, Dmitry and
Gao, Yanjun and
Panchenko, Alexander and
Valentino, Marco and
Thayaparan, Mokanarangan and
Nguyen, Thien Huu and
Penn, Gerald and
Ramesh, Arti and
Jana, Abhik",
booktitle = "Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.textgraphs-1.9",
pages = "77--88",
abstract = "We present RuDSI, a new benchmark for word sense induction (WSI) in Russian. The dataset was created using manual annotation and semi-automatic clustering of Word Usage Graphs (WUGs). RuDSI is completely data-driven (based on texts from Russian National Corpus), with no external word senses imposed on annotators. We present and analyze RuDSI, describe our annotation workflow, show how graph clustering parameters affect the dataset, report the performance that several baseline WSI methods obtain on RuDSI and discuss possibilities for improving these scores.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aksenova-etal-2022-rudsi">
<titleInfo>
<title>RuDSI: Graph-based Word Sense Induction Dataset for Russian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Aksenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Gavrishina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisei</namePart>
<namePart type="family">Rykov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Kutuzov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Ustalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanjun</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valentino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokanarangan</namePart>
<namePart type="family">Thayaparan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thien</namePart>
<namePart type="given">Huu</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerald</namePart>
<namePart type="family">Penn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arti</namePart>
<namePart type="family">Ramesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhik</namePart>
<namePart type="family">Jana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present RuDSI, a new benchmark for word sense induction (WSI) in Russian. The dataset was created using manual annotation and semi-automatic clustering of Word Usage Graphs (WUGs). RuDSI is completely data-driven (based on texts from Russian National Corpus), with no external word senses imposed on annotators. We present and analyze RuDSI, describe our annotation workflow, show how graph clustering parameters affect the dataset, report the performance that several baseline WSI methods obtain on RuDSI and discuss possibilities for improving these scores.</abstract>
<identifier type="citekey">aksenova-etal-2022-rudsi</identifier>
<location>
<url>https://aclanthology.org/2022.textgraphs-1.9</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>77</start>
<end>88</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RuDSI: Graph-based Word Sense Induction Dataset for Russian
%A Aksenova, Anna
%A Gavrishina, Ekaterina
%A Rykov, Elisei
%A Kutuzov, Andrey
%Y Ustalov, Dmitry
%Y Gao, Yanjun
%Y Panchenko, Alexander
%Y Valentino, Marco
%Y Thayaparan, Mokanarangan
%Y Nguyen, Thien Huu
%Y Penn, Gerald
%Y Ramesh, Arti
%Y Jana, Abhik
%S Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F aksenova-etal-2022-rudsi
%X We present RuDSI, a new benchmark for word sense induction (WSI) in Russian. The dataset was created using manual annotation and semi-automatic clustering of Word Usage Graphs (WUGs). RuDSI is completely data-driven (based on texts from Russian National Corpus), with no external word senses imposed on annotators. We present and analyze RuDSI, describe our annotation workflow, show how graph clustering parameters affect the dataset, report the performance that several baseline WSI methods obtain on RuDSI and discuss possibilities for improving these scores.
%U https://aclanthology.org/2022.textgraphs-1.9
%P 77-88
Markdown (Informal)
[RuDSI: Graph-based Word Sense Induction Dataset for Russian](https://aclanthology.org/2022.textgraphs-1.9) (Aksenova et al., TextGraphs 2022)
ACL
- Anna Aksenova, Ekaterina Gavrishina, Elisei Rykov, and Andrey Kutuzov. 2022. RuDSI: Graph-based Word Sense Induction Dataset for Russian. In Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing, pages 77–88, Gyeongju, Republic of Korea. Association for Computational Linguistics.