@inproceedings{kim-etal-2017-method,
title = "A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains",
author = "Kim, Juae and
Kwon, Sunjae and
Ko, Youngjoong and
Seo, Jungyun",
editor = "Jonnagaddala, Jitendra and
Dai, Hong-Jie and
Chang, Yung-Chun",
booktitle = "Proceedings of the International Workshop on Digital Disease Detection using Social Media 2017 ({DDDSM}-2017)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-5807",
pages = "47--51",
abstract = "Biomedical Named Entity (NE) recognition is a core technique for various works in the biomedical domain. In previous studies, using machine learning algorithm shows better performance than dictionary-based and rule-based approaches because there are too many terminological variations of biomedical NEs and new biomedical NEs are constantly generated. To achieve the high performance with a machine-learning algorithm, good-quality corpora are required. However, it is difficult to obtain the good-quality corpora because an-notating a biomedical corpus for ma-chine-learning is extremely time-consuming and costly. In addition, most previous corpora are insufficient for high-level tasks because they cannot cover various domains. Therefore, we propose a method for generating a large amount of machine-labeled data that covers various domains. To generate a large amount of machine-labeled data, firstly we generate an initial machine-labeled data by using a chunker and MetaMap. The chunker is developed to extract only biomedical NEs with manually annotated data. MetaMap is used to annotate the category of bio-medical NE. Then we apply the self-training approach to bootstrap the performance of initial machine-labeled data. In our experiments, the biomedical NE recognition system that is trained with our proposed machine-labeled data achieves much high performance. As a result, our system outperforms biomedical NE recognition system that using MetaMap only with 26.03{\%}p improvements on F1-score.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2017-method">
<titleInfo>
<title>A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juae</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunjae</namePart>
<namePart type="family">Kwon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youngjoong</namePart>
<namePart type="family">Ko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jungyun</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Workshop on Digital Disease Detection using Social Media 2017 (DDDSM-2017)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jitendra</namePart>
<namePart type="family">Jonnagaddala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hong-Jie</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Biomedical Named Entity (NE) recognition is a core technique for various works in the biomedical domain. In previous studies, using machine learning algorithm shows better performance than dictionary-based and rule-based approaches because there are too many terminological variations of biomedical NEs and new biomedical NEs are constantly generated. To achieve the high performance with a machine-learning algorithm, good-quality corpora are required. However, it is difficult to obtain the good-quality corpora because an-notating a biomedical corpus for ma-chine-learning is extremely time-consuming and costly. In addition, most previous corpora are insufficient for high-level tasks because they cannot cover various domains. Therefore, we propose a method for generating a large amount of machine-labeled data that covers various domains. To generate a large amount of machine-labeled data, firstly we generate an initial machine-labeled data by using a chunker and MetaMap. The chunker is developed to extract only biomedical NEs with manually annotated data. MetaMap is used to annotate the category of bio-medical NE. Then we apply the self-training approach to bootstrap the performance of initial machine-labeled data. In our experiments, the biomedical NE recognition system that is trained with our proposed machine-labeled data achieves much high performance. As a result, our system outperforms biomedical NE recognition system that using MetaMap only with 26.03%p improvements on F1-score.</abstract>
<identifier type="citekey">kim-etal-2017-method</identifier>
<location>
<url>https://aclanthology.org/W17-5807</url>
</location>
<part>
<date>2017-11</date>
<extent unit="page">
<start>47</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains
%A Kim, Juae
%A Kwon, Sunjae
%A Ko, Youngjoong
%A Seo, Jungyun
%Y Jonnagaddala, Jitendra
%Y Dai, Hong-Jie
%Y Chang, Yung-Chun
%S Proceedings of the International Workshop on Digital Disease Detection using Social Media 2017 (DDDSM-2017)
%D 2017
%8 November
%I Association for Computational Linguistics
%C Taipei, Taiwan
%F kim-etal-2017-method
%X Biomedical Named Entity (NE) recognition is a core technique for various works in the biomedical domain. In previous studies, using machine learning algorithm shows better performance than dictionary-based and rule-based approaches because there are too many terminological variations of biomedical NEs and new biomedical NEs are constantly generated. To achieve the high performance with a machine-learning algorithm, good-quality corpora are required. However, it is difficult to obtain the good-quality corpora because an-notating a biomedical corpus for ma-chine-learning is extremely time-consuming and costly. In addition, most previous corpora are insufficient for high-level tasks because they cannot cover various domains. Therefore, we propose a method for generating a large amount of machine-labeled data that covers various domains. To generate a large amount of machine-labeled data, firstly we generate an initial machine-labeled data by using a chunker and MetaMap. The chunker is developed to extract only biomedical NEs with manually annotated data. MetaMap is used to annotate the category of bio-medical NE. Then we apply the self-training approach to bootstrap the performance of initial machine-labeled data. In our experiments, the biomedical NE recognition system that is trained with our proposed machine-labeled data achieves much high performance. As a result, our system outperforms biomedical NE recognition system that using MetaMap only with 26.03%p improvements on F1-score.
%U https://aclanthology.org/W17-5807
%P 47-51
Markdown (Informal)
[A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains](https://aclanthology.org/W17-5807) (Kim et al., 2017)
ACL