@InProceedings{dandala-EtAl:2016:BioTxtM2016,
  author    = {Dandala, Bharath  and  Devarakonda, Murthy  and  Bornea, Mihaela  and  Nielson, Christopher},
  title     = {Scoring Disease-Medication Associations using Advanced NLP, Machine Learning, and Multiple Content Sources},
  booktitle = {Proceedings of the Fifth Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {125--133},
  abstract  = {Effective knowledge resources are critical for developing successful clinical
	decision support systems that alleviate the cognitive load on physicians in
	patient care. In this paper, we describe two new methods for building a
	knowledge resource of disease to medication associations. These methods use
	fundamentally different content and are based on advanced natural language
	processing and machine learning techniques. One method uses distributional
	semantics on large medical text, and the other uses data mining on a large
	number of patient records. The methods are evaluated using 25,379 unique
	disease-medication pairs extracted from 100 de-identified longitudinal patient
	records of a large multi-provider hospital system. We measured recall (R),
	precision (P), and F scores for positive and negative association prediction,
	along with coverage and accuracy. While individual methods performed well, a
	combined stacked classifier achieved the best performance, indicating the
	limitations and unique value of each resource and method. In predicting
	positive associations, the stacked combination significantly outperformed the
	baseline (a distant semi-supervised method on large medical text), achieving F
	scores of 0.75 versus 0.55 on the pairs seen in the patient records, and F
	scores of 0.69 and 0.35 on unique pairs.},
  url       = {http://aclweb.org/anthology/W16-5114}
}

