@inproceedings{zubke-2017-classification,
title = "Classification based extraction of numeric values from clinical narratives",
author = "Zubke, Maximilian",
editor = "Boytcheva, Svetla and
Cohen, Kevin Bretonnel and
Savova, Guergana and
Angelova, Galia",
booktitle = "Proceedings of the Biomedical {NLP} Workshop associated with {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-044-1_004",
doi = "10.26615/978-954-452-044-1_004",
pages = "24--31",
abstract = "The robust extraction of numeric values from clinical narratives is a well known problem in clinical data warehouses. In this paper we describe a dynamic and domain-independent approach to deliver numerical described values from clinical narratives. In contrast to alternative systems, we neither use manual defined rules nor any kind of ontologies or nomenclatures. Instead we propose a topic-based system, that tackles the information extraction as a text classification problem. Hence we use machine learning to identify the crucial context features of a topic-specific numeric value by a given set of example sentences, so that the manual effort reduces to the selection of appropriate sample sentences. We describe context features of a certain numeric value by term frequency vectors which are generated by multiple document segmentation procedures. Due to this simultaneous segmentation approaches, there can be more than one context vector for a numeric value. In those cases, we choose the context vector with the highest classification confidence and suppress the rest. To test our approach, we used a dataset from a german hospital containing 12,743 narrative reports about laboratory results of Leukemia patients. We used Support Vector Machines (SVM) for classification and achieved an average accuracy of 96{\%} on a manually labeled subset of 2073 documents, using 10-fold cross validation. This is a significant improvement over an alternative rule based system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zubke-2017-classification">
<titleInfo>
<title>Classification based extraction of numeric values from clinical narratives</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Zubke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Biomedical NLP Workshop associated with RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svetla</namePart>
<namePart type="family">Boytcheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guergana</namePart>
<namePart type="family">Savova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The robust extraction of numeric values from clinical narratives is a well known problem in clinical data warehouses. In this paper we describe a dynamic and domain-independent approach to deliver numerical described values from clinical narratives. In contrast to alternative systems, we neither use manual defined rules nor any kind of ontologies or nomenclatures. Instead we propose a topic-based system, that tackles the information extraction as a text classification problem. Hence we use machine learning to identify the crucial context features of a topic-specific numeric value by a given set of example sentences, so that the manual effort reduces to the selection of appropriate sample sentences. We describe context features of a certain numeric value by term frequency vectors which are generated by multiple document segmentation procedures. Due to this simultaneous segmentation approaches, there can be more than one context vector for a numeric value. In those cases, we choose the context vector with the highest classification confidence and suppress the rest. To test our approach, we used a dataset from a german hospital containing 12,743 narrative reports about laboratory results of Leukemia patients. We used Support Vector Machines (SVM) for classification and achieved an average accuracy of 96% on a manually labeled subset of 2073 documents, using 10-fold cross validation. This is a significant improvement over an alternative rule based system.</abstract>
<identifier type="citekey">zubke-2017-classification</identifier>
<identifier type="doi">10.26615/978-954-452-044-1_004</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>24</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Classification based extraction of numeric values from clinical narratives
%A Zubke, Maximilian
%Y Boytcheva, Svetla
%Y Cohen, Kevin Bretonnel
%Y Savova, Guergana
%Y Angelova, Galia
%S Proceedings of the Biomedical NLP Workshop associated with RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F zubke-2017-classification
%X The robust extraction of numeric values from clinical narratives is a well known problem in clinical data warehouses. In this paper we describe a dynamic and domain-independent approach to deliver numerical described values from clinical narratives. In contrast to alternative systems, we neither use manual defined rules nor any kind of ontologies or nomenclatures. Instead we propose a topic-based system, that tackles the information extraction as a text classification problem. Hence we use machine learning to identify the crucial context features of a topic-specific numeric value by a given set of example sentences, so that the manual effort reduces to the selection of appropriate sample sentences. We describe context features of a certain numeric value by term frequency vectors which are generated by multiple document segmentation procedures. Due to this simultaneous segmentation approaches, there can be more than one context vector for a numeric value. In those cases, we choose the context vector with the highest classification confidence and suppress the rest. To test our approach, we used a dataset from a german hospital containing 12,743 narrative reports about laboratory results of Leukemia patients. We used Support Vector Machines (SVM) for classification and achieved an average accuracy of 96% on a manually labeled subset of 2073 documents, using 10-fold cross validation. This is a significant improvement over an alternative rule based system.
%R 10.26615/978-954-452-044-1_004
%U https://doi.org/10.26615/978-954-452-044-1_004
%P 24-31
Markdown (Informal)
[Classification based extraction of numeric values from clinical narratives](https://doi.org/10.26615/978-954-452-044-1_004) (Zubke, RANLP 2017)
ACL