@InProceedings{zubke:2017:BioNLP,
  author    = {Zubke, Maximilian},
  title     = {Classification based extraction of numeric values from clinical narratives},
  booktitle = {Proceedings of the Biomedical NLP Workshop associated with RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {24--31},
  abstract  = {The robust extraction of numeric values from clinical narratives is a well
	known problem in clinical data warehouses.
	In this paper we describe a dynamic and domain-independent  approach to deliver
	numerical described values from clinical narratives. In contrast to alternative
	systems, we neither use manual defined rules nor any kind of ontologies or
	nomenclatures. Instead we propose a topic-based system, that tackles the
	information extraction as a text classification problem. Hence we use machine
	learning to identify the crucial context features of a topic-specific numeric
	value by a given set of example sentences, so that the manual effort reduces to
	the selection of appropriate sample sentences.
	We describe context features of a certain numeric value by term frequency
	vectors which are generated by multiple document segmentation procedures. Due
	to this simultaneous segmentation approaches, there can be more than one
	context vector for a numeric value. In those cases, we choose the context
	vector with the highest classification confidence and suppress the rest.
	To test our approach, we used a dataset from a german hospital containing
	12\,743 narrative reports about laboratory results of Leukemia patients. We
	used Support Vector Machines (SVM) for classification and achieved an average
	accuracy of 96\% on a manually labeled                    subset of 2073 documents,
	using
	10-fold
	cross validation.  This is a significant improvement over an alternative rule
	based system.},
  url       = {https://doi.org/10.26615/978-954-452-044-1_004}
}

