@inproceedings{zhang-etal-2017-enhancing,
title = "Enhancing Automatic {ICD}-9-{CM} Code Assignment for Medical Texts with {P}ub{M}ed",
author = "Zhang, Danchen and
He, Daqing and
Zhao, Sanqiang and
Li, Lei",
editor = "Cohen, Kevin Bretonnel and
Demner-Fushman, Dina and
Ananiadou, Sophia and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2017",
month = aug,
year = "2017",
address = "Vancouver, Canada,",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2333",
doi = "10.18653/v1/W17-2333",
pages = "263--271",
abstract = "Assigning a standard ICD-9-CM code to disease symptoms in medical texts is an important task in the medical domain. Automating this process could greatly reduce the costs. However, the effectiveness of an automatic ICD-9-CM code classifier faces a serious problem, which can be triggered by unbalanced training data. Frequent diseases often have more training data, which helps its classification to perform better than that of an infrequent disease. However, a disease{'}s frequency does not necessarily reflect its importance. To resolve this training data shortage problem, we propose to strategically draw data from PubMed to enrich the training data when there is such need. We validate our method on the CMC dataset, and the evaluation results indicate that our method can significantly improve the code assignment classifiers{'} performance at the macro-averaging level.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2017-enhancing">
<titleInfo>
<title>Enhancing Automatic ICD-9-CM Code Assignment for Medical Texts with PubMed</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danchen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daqing</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanqiang</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada,</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Assigning a standard ICD-9-CM code to disease symptoms in medical texts is an important task in the medical domain. Automating this process could greatly reduce the costs. However, the effectiveness of an automatic ICD-9-CM code classifier faces a serious problem, which can be triggered by unbalanced training data. Frequent diseases often have more training data, which helps its classification to perform better than that of an infrequent disease. However, a disease’s frequency does not necessarily reflect its importance. To resolve this training data shortage problem, we propose to strategically draw data from PubMed to enrich the training data when there is such need. We validate our method on the CMC dataset, and the evaluation results indicate that our method can significantly improve the code assignment classifiers’ performance at the macro-averaging level.</abstract>
<identifier type="citekey">zhang-etal-2017-enhancing</identifier>
<identifier type="doi">10.18653/v1/W17-2333</identifier>
<location>
<url>https://aclanthology.org/W17-2333</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>263</start>
<end>271</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Automatic ICD-9-CM Code Assignment for Medical Texts with PubMed
%A Zhang, Danchen
%A He, Daqing
%A Zhao, Sanqiang
%A Li, Lei
%Y Cohen, Kevin Bretonnel
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Tsujii, Junichi
%S BioNLP 2017
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada,
%F zhang-etal-2017-enhancing
%X Assigning a standard ICD-9-CM code to disease symptoms in medical texts is an important task in the medical domain. Automating this process could greatly reduce the costs. However, the effectiveness of an automatic ICD-9-CM code classifier faces a serious problem, which can be triggered by unbalanced training data. Frequent diseases often have more training data, which helps its classification to perform better than that of an infrequent disease. However, a disease’s frequency does not necessarily reflect its importance. To resolve this training data shortage problem, we propose to strategically draw data from PubMed to enrich the training data when there is such need. We validate our method on the CMC dataset, and the evaluation results indicate that our method can significantly improve the code assignment classifiers’ performance at the macro-averaging level.
%R 10.18653/v1/W17-2333
%U https://aclanthology.org/W17-2333
%U https://doi.org/10.18653/v1/W17-2333
%P 263-271
Markdown (Informal)
[Enhancing Automatic ICD-9-CM Code Assignment for Medical Texts with PubMed](https://aclanthology.org/W17-2333) (Zhang et al., BioNLP 2017)
ACL