@inproceedings{hoelscher-obermaier-etal-2022-leveraging,
title = "Leveraging knowledge graphs to update scientific word embeddings using latent semantic imputation",
author = "Hoelscher-Obermaier, Jason and
Stevinson, Edward and
Stauber, Valentin and
Zhelev, Ivaylo and
Botev, Viktor and
Wu, Ronin and
Minton, Jeremy",
editor = "Ghosal, Tirthankar and
Blanco-Cuaresma, Sergi and
Accomazzi, Alberto and
Patton, Robert M. and
Grezes, Felix and
Allen, Thomas",
booktitle = "Proceedings of the first Workshop on Information Extraction from Scientific Publications",
month = nov,
year = "2022",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wiesp-1.6",
pages = "43--53",
abstract = "The most interesting words in scientific texts will often be novel or rare. This presents a challenge for scientific word embedding models to determine quality embedding vectors for useful terms that are infrequent or newly emerging. We demonstrate how Latent Semantic Imputation (LSI) can address this problem by imputing embeddings for domain-specific words from up-to-date knowledge graphs while otherwise preserving the original word embedding model. We use the MeSH knowledge graph to impute embedding vectors for biomedical terminology without retraining and evaluate the resulting embedding model on a domain-specific word-pair similarity task. We show that LSI can produce reliable embedding vectors for rare and out-of-vocabulary terms in the biomedical domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hoelscher-obermaier-etal-2022-leveraging">
<titleInfo>
<title>Leveraging knowledge graphs to update scientific word embeddings using latent semantic imputation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Hoelscher-Obermaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Stevinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Stauber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivaylo</namePart>
<namePart type="family">Zhelev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viktor</namePart>
<namePart type="family">Botev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronin</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Minton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first Workshop on Information Extraction from Scientific Publications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergi</namePart>
<namePart type="family">Blanco-Cuaresma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Accomazzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Patton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Grezes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Allen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The most interesting words in scientific texts will often be novel or rare. This presents a challenge for scientific word embedding models to determine quality embedding vectors for useful terms that are infrequent or newly emerging. We demonstrate how Latent Semantic Imputation (LSI) can address this problem by imputing embeddings for domain-specific words from up-to-date knowledge graphs while otherwise preserving the original word embedding model. We use the MeSH knowledge graph to impute embedding vectors for biomedical terminology without retraining and evaluate the resulting embedding model on a domain-specific word-pair similarity task. We show that LSI can produce reliable embedding vectors for rare and out-of-vocabulary terms in the biomedical domain.</abstract>
<identifier type="citekey">hoelscher-obermaier-etal-2022-leveraging</identifier>
<location>
<url>https://aclanthology.org/2022.wiesp-1.6</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>43</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging knowledge graphs to update scientific word embeddings using latent semantic imputation
%A Hoelscher-Obermaier, Jason
%A Stevinson, Edward
%A Stauber, Valentin
%A Zhelev, Ivaylo
%A Botev, Viktor
%A Wu, Ronin
%A Minton, Jeremy
%Y Ghosal, Tirthankar
%Y Blanco-Cuaresma, Sergi
%Y Accomazzi, Alberto
%Y Patton, Robert M.
%Y Grezes, Felix
%Y Allen, Thomas
%S Proceedings of the first Workshop on Information Extraction from Scientific Publications
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online
%F hoelscher-obermaier-etal-2022-leveraging
%X The most interesting words in scientific texts will often be novel or rare. This presents a challenge for scientific word embedding models to determine quality embedding vectors for useful terms that are infrequent or newly emerging. We demonstrate how Latent Semantic Imputation (LSI) can address this problem by imputing embeddings for domain-specific words from up-to-date knowledge graphs while otherwise preserving the original word embedding model. We use the MeSH knowledge graph to impute embedding vectors for biomedical terminology without retraining and evaluate the resulting embedding model on a domain-specific word-pair similarity task. We show that LSI can produce reliable embedding vectors for rare and out-of-vocabulary terms in the biomedical domain.
%U https://aclanthology.org/2022.wiesp-1.6
%P 43-53
Markdown (Informal)
[Leveraging knowledge graphs to update scientific word embeddings using latent semantic imputation](https://aclanthology.org/2022.wiesp-1.6) (Hoelscher-Obermaier et al., WIESP 2022)
ACL