@InProceedings{zhang-EtAl:2017:BioNLP17,
  author    = {Zhang, Danchen  and  He, Daqing  and  Zhao, Sanqiang  and  Li, Lei},
  title     = {Enhancing Automatic ICD-9-CM Code Assignment for Medical Texts with PubMed},
  booktitle = {BioNLP 2017},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada,},
  publisher = {Association for Computational Linguistics},
  pages     = {263--271},
  abstract  = {Assigning a standard ICD-9-CM code to disease symptoms in medical texts is an
	important task in the medical domain. Automating this process could greatly
	reduce the costs. However, the effectiveness of an automatic ICD-9-CM code
	classifier faces a serious problem, which can be triggered by unbalanced
	training data. Frequent diseases often have more training data, which helps its
	classification to perform better than that of an infrequent disease. However, a
	disease’s frequency does not necessarily reflect its importance. To resolve
	this training data shortage problem, we propose to strategically draw data from
	PubMed to enrich the training data when there is such need. We validate our
	method on the CMC dataset, and the evaluation results indicate that our method
	can significantly improve the code assignment classifiers' performance at the
	macro-averaging level.},
  url       = {http://www.aclweb.org/anthology/W17-2333}
}

