@inproceedings{belousov-etal-2019-mednorm,
title = "{M}ed{N}orm: A Corpus and Embeddings for Cross-terminology Medical Concept Normalisation",
author = "Belousov, Maksim and
Dixon, William G. and
Nenadic, Goran",
editor = "Weissenbacher, Davy and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the Fourth Social Media Mining for Health Applications ({\#}SMM4H) Workshop {\&} Shared Task",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3204",
doi = "10.18653/v1/W19-3204",
pages = "31--39",
abstract = "The medical concept normalisation task aims to map textual descriptions to standard terminologies such as SNOMED-CT or MedDRA. Existing publicly available datasets annotated using different terminologies cannot be simply merged and utilised, and therefore become less valuable when developing machine learning-based concept normalisation systems. To address that, we designed a data harmonisation pipeline and engineered a corpus of 27,979 textual descriptions simultaneously mapped to both MedDRA and SNOMED-CT, sourced from five publicly available datasets across biomedical and social media domains. The pipeline can be used in the future to integrate new datasets into the corpus and also could be applied in relevant data curation tasks. We also described a method to merge different terminologies into a single concept graph preserving their relations and demonstrated that representation learning approach based on random walks on a graph can efficiently encode both hierarchical and equivalent relations and capture semantic similarities not only between concepts inside a given terminology but also between concepts from different terminologies. We believe that making a corpus and embeddings for cross-terminology medical concept normalisation available to the research community would contribute to a better understanding of the task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="belousov-etal-2019-mednorm">
<titleInfo>
<title>MedNorm: A Corpus and Embeddings for Cross-terminology Medical Concept Normalisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maksim</namePart>
<namePart type="family">Belousov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Dixon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Nenadic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop & Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Davy</namePart>
<namePart type="family">Weissenbacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The medical concept normalisation task aims to map textual descriptions to standard terminologies such as SNOMED-CT or MedDRA. Existing publicly available datasets annotated using different terminologies cannot be simply merged and utilised, and therefore become less valuable when developing machine learning-based concept normalisation systems. To address that, we designed a data harmonisation pipeline and engineered a corpus of 27,979 textual descriptions simultaneously mapped to both MedDRA and SNOMED-CT, sourced from five publicly available datasets across biomedical and social media domains. The pipeline can be used in the future to integrate new datasets into the corpus and also could be applied in relevant data curation tasks. We also described a method to merge different terminologies into a single concept graph preserving their relations and demonstrated that representation learning approach based on random walks on a graph can efficiently encode both hierarchical and equivalent relations and capture semantic similarities not only between concepts inside a given terminology but also between concepts from different terminologies. We believe that making a corpus and embeddings for cross-terminology medical concept normalisation available to the research community would contribute to a better understanding of the task.</abstract>
<identifier type="citekey">belousov-etal-2019-mednorm</identifier>
<identifier type="doi">10.18653/v1/W19-3204</identifier>
<location>
<url>https://aclanthology.org/W19-3204</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>31</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MedNorm: A Corpus and Embeddings for Cross-terminology Medical Concept Normalisation
%A Belousov, Maksim
%A Dixon, William G.
%A Nenadic, Goran
%Y Weissenbacher, Davy
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop & Shared Task
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F belousov-etal-2019-mednorm
%X The medical concept normalisation task aims to map textual descriptions to standard terminologies such as SNOMED-CT or MedDRA. Existing publicly available datasets annotated using different terminologies cannot be simply merged and utilised, and therefore become less valuable when developing machine learning-based concept normalisation systems. To address that, we designed a data harmonisation pipeline and engineered a corpus of 27,979 textual descriptions simultaneously mapped to both MedDRA and SNOMED-CT, sourced from five publicly available datasets across biomedical and social media domains. The pipeline can be used in the future to integrate new datasets into the corpus and also could be applied in relevant data curation tasks. We also described a method to merge different terminologies into a single concept graph preserving their relations and demonstrated that representation learning approach based on random walks on a graph can efficiently encode both hierarchical and equivalent relations and capture semantic similarities not only between concepts inside a given terminology but also between concepts from different terminologies. We believe that making a corpus and embeddings for cross-terminology medical concept normalisation available to the research community would contribute to a better understanding of the task.
%R 10.18653/v1/W19-3204
%U https://aclanthology.org/W19-3204
%U https://doi.org/10.18653/v1/W19-3204
%P 31-39
Markdown (Informal)
[MedNorm: A Corpus and Embeddings for Cross-terminology Medical Concept Normalisation](https://aclanthology.org/W19-3204) (Belousov et al., ACL 2019)
ACL