@InProceedings{patel-EtAl:2017:BioNLP17,
  author    = {Patel, Kevin  and  Patel, Divya  and  Golakiya, Mansi  and  Bhattacharyya, Pushpak  and  Birari, Nilesh},
  title     = {Adapting Pre-trained Word Embeddings For Use In Medical Coding},
  booktitle = {BioNLP 2017},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada,},
  publisher = {Association for Computational Linguistics},
  pages     = {302--306},
  abstract  = {Word embeddings are a crucial component in modern NLP. Pre-trained embeddings
	released by different groups have been a major reason for their popularity.
	However, they are trained on generic corpora, which limits their direct use for
	domain specific tasks. In this paper, we propose a method to add task specific
	information to pre-trained word embeddings. Such information can improve their
	utility. We add information from medical coding data, as well as the first
	level from the hierarchy of ICD-10 medical code set to different pre-trained
	word embeddings. We adapt CBOW algorithm from the word2vec package for our
	purpose. We evaluated our approach on five different pre-trained word
	embeddings. Both the original word embeddings, and their modified versions (the
	ones with added information) were used for automated review of medical coding.
	The modified word embeddings give an improvement in f-score by 1% on the
	5-fold evaluation on a private medical claims dataset. Our results show that
	adding extra information is possible and beneficial for the task at hand.},
  url       = {http://www.aclweb.org/anthology/W17-2338}
}

