@inproceedings{patel-etal-2017-adapting,
title = "Adapting Pre-trained Word Embeddings For Use In Medical Coding",
author = "Patel, Kevin and
Patel, Divya and
Golakiya, Mansi and
Bhattacharyya, Pushpak and
Birari, Nilesh",
editor = "Cohen, Kevin Bretonnel and
Demner-Fushman, Dina and
Ananiadou, Sophia and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2017",
month = aug,
year = "2017",
address = "Vancouver, Canada,",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2338",
doi = "10.18653/v1/W17-2338",
pages = "302--306",
abstract = "Word embeddings are a crucial component in modern NLP. Pre-trained embeddings released by different groups have been a major reason for their popularity. However, they are trained on generic corpora, which limits their direct use for domain specific tasks. In this paper, we propose a method to add task specific information to pre-trained word embeddings. Such information can improve their utility. We add information from medical coding data, as well as the first level from the hierarchy of ICD-10 medical code set to different pre-trained word embeddings. We adapt CBOW algorithm from the word2vec package for our purpose. We evaluated our approach on five different pre-trained word embeddings. Both the original word embeddings, and their modified versions (the ones with added information) were used for automated review of medical coding. The modified word embeddings give an improvement in f-score by 1{\%} on the 5-fold evaluation on a private medical claims dataset. Our results show that adding extra information is possible and beneficial for the task at hand.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patel-etal-2017-adapting">
<titleInfo>
<title>Adapting Pre-trained Word Embeddings For Use In Medical Coding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Divya</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mansi</namePart>
<namePart type="family">Golakiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nilesh</namePart>
<namePart type="family">Birari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada,</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word embeddings are a crucial component in modern NLP. Pre-trained embeddings released by different groups have been a major reason for their popularity. However, they are trained on generic corpora, which limits their direct use for domain specific tasks. In this paper, we propose a method to add task specific information to pre-trained word embeddings. Such information can improve their utility. We add information from medical coding data, as well as the first level from the hierarchy of ICD-10 medical code set to different pre-trained word embeddings. We adapt CBOW algorithm from the word2vec package for our purpose. We evaluated our approach on five different pre-trained word embeddings. Both the original word embeddings, and their modified versions (the ones with added information) were used for automated review of medical coding. The modified word embeddings give an improvement in f-score by 1% on the 5-fold evaluation on a private medical claims dataset. Our results show that adding extra information is possible and beneficial for the task at hand.</abstract>
<identifier type="citekey">patel-etal-2017-adapting</identifier>
<identifier type="doi">10.18653/v1/W17-2338</identifier>
<location>
<url>https://aclanthology.org/W17-2338</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>302</start>
<end>306</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adapting Pre-trained Word Embeddings For Use In Medical Coding
%A Patel, Kevin
%A Patel, Divya
%A Golakiya, Mansi
%A Bhattacharyya, Pushpak
%A Birari, Nilesh
%Y Cohen, Kevin Bretonnel
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Tsujii, Junichi
%S BioNLP 2017
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada,
%F patel-etal-2017-adapting
%X Word embeddings are a crucial component in modern NLP. Pre-trained embeddings released by different groups have been a major reason for their popularity. However, they are trained on generic corpora, which limits their direct use for domain specific tasks. In this paper, we propose a method to add task specific information to pre-trained word embeddings. Such information can improve their utility. We add information from medical coding data, as well as the first level from the hierarchy of ICD-10 medical code set to different pre-trained word embeddings. We adapt CBOW algorithm from the word2vec package for our purpose. We evaluated our approach on five different pre-trained word embeddings. Both the original word embeddings, and their modified versions (the ones with added information) were used for automated review of medical coding. The modified word embeddings give an improvement in f-score by 1% on the 5-fold evaluation on a private medical claims dataset. Our results show that adding extra information is possible and beneficial for the task at hand.
%R 10.18653/v1/W17-2338
%U https://aclanthology.org/W17-2338
%U https://doi.org/10.18653/v1/W17-2338
%P 302-306
Markdown (Informal)
[Adapting Pre-trained Word Embeddings For Use In Medical Coding](https://aclanthology.org/W17-2338) (Patel et al., BioNLP 2017)
ACL