@InProceedings{kim-EtAl:2017:DDDSM,
  author    = {Kim, Juae  and  Kwon, Sunjae  and  Ko, Youngjoong  and  Seo, Jungyun},
  title     = {A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains},
  booktitle = {Proceedings of the International Workshop on Digital Disease Detection using Social Media 2017 (DDDSM-2017)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Association for Computational Linguistics},
  pages     = {47--51},
  abstract  = {Biomedical Named Entity (NE) recognition is a core technique for various works
	in the biomedical domain. In previous studies, using machine learning algorithm
	shows better performance than dictionary-based and rule-based approaches
	because there are too many terminological variations of biomedical NEs and new
	biomedical NEs are constantly generated. To achieve the high performance with a
	machine-learning algorithm, good-quality corpora are required. However, it is
	difficult to obtain the good-quality corpora because an-notating a biomedical
	corpus for ma-chine-learning is extremely time-consuming and costly. In
	addition, most previous corpora are insufficient for high-level tasks because
	they cannot cover various domains. Therefore, we propose a method for
	generating a large amount of machine-labeled data that covers various domains.
	To generate a large amount of machine-labeled data, firstly we generate an
	initial machine-labeled data by using a chunker and MetaMap. The chunker is
	developed to extract only biomedical NEs with manually annotated data. MetaMap
	is used to annotate the category of bio-medical NE. Then we apply the
	self-training approach to bootstrap the performance of initial machine-labeled
	data. In our experiments, the biomedical NE recognition system that is trained
	with our proposed machine-labeled data achieves much high performance. As a
	result, our system outperforms biomedical NE recognition system that using
	MetaMap only with 26.03%p improvements on F1-score.},
  url       = {http://www.aclweb.org/anthology/W17-5807}
}

