@inproceedings{bajaj-etal-2022-evaluating,
title = "Evaluating Biomedical Word Embeddings for Vocabulary Alignment at Scale in the {UMLS} {M}etathesaurus Using {S}iamese Networks",
author = "Bajaj, Goonmeet and
Nguyen, Vinh and
Wijesiriwardene, Thilini and
Yip, Hong Yung and
Javangula, Vishesh and
Sheth, Amit and
Parthasarathy, Srinivasan and
Bodenreider, Olivier",
editor = "Tafreshi, Shabnam and
Sedoc, Jo{\~a}o and
Rogers, Anna and
Drozd, Aleksandr and
Rumshisky, Anna and
Akula, Arjun",
booktitle = "Proceedings of the Third Workshop on Insights from Negative Results in NLP",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.insights-1.11",
doi = "10.18653/v1/2022.insights-1.11",
pages = "82--87",
abstract = "Recent work uses a Siamese Network, initialized with BioWordVec embeddings (distributed word embeddings), for predicting synonymy among biomedical terms to automate a part of the UMLS (Unified Medical Language System) Metathesaurus construction process. We evaluate the use of contextualized word embeddings extracted from nine different biomedical BERT-based models for synonym prediction in the UMLS by replacing BioWordVec embeddings with embeddings extracted from each biomedical BERT model using different feature extraction methods. Finally, we conduct a thorough grid search, which prior work lacks, to find the best set of hyperparameters. Surprisingly, we find that Siamese Networks initialized with BioWordVec embeddings still out perform the Siamese Networks initialized with embedding extracted from biomedical BERT model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bajaj-etal-2022-evaluating">
<titleInfo>
<title>Evaluating Biomedical Word Embeddings for Vocabulary Alignment at Scale in the UMLS Metathesaurus Using Siamese Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Goonmeet</namePart>
<namePart type="family">Bajaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinh</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thilini</namePart>
<namePart type="family">Wijesiriwardene</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hong</namePart>
<namePart type="given">Yung</namePart>
<namePart type="family">Yip</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishesh</namePart>
<namePart type="family">Javangula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Sheth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivasan</namePart>
<namePart type="family">Parthasarathy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivier</namePart>
<namePart type="family">Bodenreider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="family">Drozd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="family">Akula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work uses a Siamese Network, initialized with BioWordVec embeddings (distributed word embeddings), for predicting synonymy among biomedical terms to automate a part of the UMLS (Unified Medical Language System) Metathesaurus construction process. We evaluate the use of contextualized word embeddings extracted from nine different biomedical BERT-based models for synonym prediction in the UMLS by replacing BioWordVec embeddings with embeddings extracted from each biomedical BERT model using different feature extraction methods. Finally, we conduct a thorough grid search, which prior work lacks, to find the best set of hyperparameters. Surprisingly, we find that Siamese Networks initialized with BioWordVec embeddings still out perform the Siamese Networks initialized with embedding extracted from biomedical BERT model.</abstract>
<identifier type="citekey">bajaj-etal-2022-evaluating</identifier>
<identifier type="doi">10.18653/v1/2022.insights-1.11</identifier>
<location>
<url>https://aclanthology.org/2022.insights-1.11</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>82</start>
<end>87</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Biomedical Word Embeddings for Vocabulary Alignment at Scale in the UMLS Metathesaurus Using Siamese Networks
%A Bajaj, Goonmeet
%A Nguyen, Vinh
%A Wijesiriwardene, Thilini
%A Yip, Hong Yung
%A Javangula, Vishesh
%A Sheth, Amit
%A Parthasarathy, Srinivasan
%A Bodenreider, Olivier
%Y Tafreshi, Shabnam
%Y Sedoc, João
%Y Rogers, Anna
%Y Drozd, Aleksandr
%Y Rumshisky, Anna
%Y Akula, Arjun
%S Proceedings of the Third Workshop on Insights from Negative Results in NLP
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F bajaj-etal-2022-evaluating
%X Recent work uses a Siamese Network, initialized with BioWordVec embeddings (distributed word embeddings), for predicting synonymy among biomedical terms to automate a part of the UMLS (Unified Medical Language System) Metathesaurus construction process. We evaluate the use of contextualized word embeddings extracted from nine different biomedical BERT-based models for synonym prediction in the UMLS by replacing BioWordVec embeddings with embeddings extracted from each biomedical BERT model using different feature extraction methods. Finally, we conduct a thorough grid search, which prior work lacks, to find the best set of hyperparameters. Surprisingly, we find that Siamese Networks initialized with BioWordVec embeddings still out perform the Siamese Networks initialized with embedding extracted from biomedical BERT model.
%R 10.18653/v1/2022.insights-1.11
%U https://aclanthology.org/2022.insights-1.11
%U https://doi.org/10.18653/v1/2022.insights-1.11
%P 82-87
Markdown (Informal)
[Evaluating Biomedical Word Embeddings for Vocabulary Alignment at Scale in the UMLS Metathesaurus Using Siamese Networks](https://aclanthology.org/2022.insights-1.11) (Bajaj et al., insights 2022)
ACL
- Goonmeet Bajaj, Vinh Nguyen, Thilini Wijesiriwardene, Hong Yung Yip, Vishesh Javangula, Amit Sheth, Srinivasan Parthasarathy, and Olivier Bodenreider. 2022. Evaluating Biomedical Word Embeddings for Vocabulary Alignment at Scale in the UMLS Metathesaurus Using Siamese Networks. In Proceedings of the Third Workshop on Insights from Negative Results in NLP, pages 82–87, Dublin, Ireland. Association for Computational Linguistics.