@inproceedings{nesterov-etal-2022-ruccon,
title = "{R}u{CC}o{N}: Clinical Concept Normalization in {R}ussian",
author = "Nesterov, Alexandr and
Zubkova, Galina and
Miftahutdinov, Zulfat and
Kokh, Vladimir and
Tutubalina, Elena and
Shelmanov, Artem and
Alekseev, Anton and
Avetisian, Manvel and
Chertok, Andrey and
Nikolenko, Sergey",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.21/",
doi = "10.18653/v1/2022.findings-acl.21",
pages = "239--245",
abstract = "We present RuCCoN, a new dataset for clinical concept normalization in Russian manually annotated by medical professionals. It contains over 16,028 entity mentions manually linked to over 2,409 unique concepts from the Russian language part of the UMLS ontology. We provide train/test splits for different settings (stratified, zero-shot, and CUI-less) and present strong baselines obtained with state-of-the-art models such as SapBERT. At present, Russian medical NLP is lacking in both datasets and trained models, and we view this work as an important step towards filling this gap. Our dataset and annotation guidelines are available at \url{https://github.com/sberbank-ai-lab/RuCCoN}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nesterov-etal-2022-ruccon">
<titleInfo>
<title>RuCCoN: Clinical Concept Normalization in Russian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandr</namePart>
<namePart type="family">Nesterov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galina</namePart>
<namePart type="family">Zubkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zulfat</namePart>
<namePart type="family">Miftahutdinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Kokh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Tutubalina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anton</namePart>
<namePart type="family">Alekseev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manvel</namePart>
<namePart type="family">Avetisian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Chertok</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergey</namePart>
<namePart type="family">Nikolenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present RuCCoN, a new dataset for clinical concept normalization in Russian manually annotated by medical professionals. It contains over 16,028 entity mentions manually linked to over 2,409 unique concepts from the Russian language part of the UMLS ontology. We provide train/test splits for different settings (stratified, zero-shot, and CUI-less) and present strong baselines obtained with state-of-the-art models such as SapBERT. At present, Russian medical NLP is lacking in both datasets and trained models, and we view this work as an important step towards filling this gap. Our dataset and annotation guidelines are available at https://github.com/sberbank-ai-lab/RuCCoN.</abstract>
<identifier type="citekey">nesterov-etal-2022-ruccon</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.21</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.21/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>239</start>
<end>245</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RuCCoN: Clinical Concept Normalization in Russian
%A Nesterov, Alexandr
%A Zubkova, Galina
%A Miftahutdinov, Zulfat
%A Kokh, Vladimir
%A Tutubalina, Elena
%A Shelmanov, Artem
%A Alekseev, Anton
%A Avetisian, Manvel
%A Chertok, Andrey
%A Nikolenko, Sergey
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F nesterov-etal-2022-ruccon
%X We present RuCCoN, a new dataset for clinical concept normalization in Russian manually annotated by medical professionals. It contains over 16,028 entity mentions manually linked to over 2,409 unique concepts from the Russian language part of the UMLS ontology. We provide train/test splits for different settings (stratified, zero-shot, and CUI-less) and present strong baselines obtained with state-of-the-art models such as SapBERT. At present, Russian medical NLP is lacking in both datasets and trained models, and we view this work as an important step towards filling this gap. Our dataset and annotation guidelines are available at https://github.com/sberbank-ai-lab/RuCCoN.
%R 10.18653/v1/2022.findings-acl.21
%U https://aclanthology.org/2022.findings-acl.21/
%U https://doi.org/10.18653/v1/2022.findings-acl.21
%P 239-245
Markdown (Informal)
[RuCCoN: Clinical Concept Normalization in Russian](https://aclanthology.org/2022.findings-acl.21/) (Nesterov et al., Findings 2022)
ACL
- Alexandr Nesterov, Galina Zubkova, Zulfat Miftahutdinov, Vladimir Kokh, Elena Tutubalina, Artem Shelmanov, Anton Alekseev, Manvel Avetisian, Andrey Chertok, and Sergey Nikolenko. 2022. RuCCoN: Clinical Concept Normalization in Russian. In Findings of the Association for Computational Linguistics: ACL 2022, pages 239–245, Dublin, Ireland. Association for Computational Linguistics.