@InProceedings{limsopatham-collier:2016:BioTxtM2016,
  author    = {Limsopatham, Nut  and  Collier, Nigel},
  title     = {Learning Orthographic Features in Bi-directional LSTM for Biomedical Named Entity Recognition},
  booktitle = {Proceedings of the Fifth Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {10--19},
  abstract  = {End-to-end neural network models for named entity recognition (NER) have shown
	to achieve effective performances on general domain datasets (e.g.\ newswire),
	without requiring additional hand-crafted features. However, in biomedical
	domain, recent studies have shown that hand-engineered features (e.g.\
	orthographic features) should be used to attain effective performance, due to
	the complexity of biomedical terminology (e.g.\ the use of acronyms and complex
	gene names). In this work, we propose a novel approach that allows a neural
	network model based on a long short-term memory (LSTM) to automatically learn
	orthographic features and incorporate them into a model for biomedical NER.
	Importantly, our bi-directional LSTM model learns and leverages orthographic
	features on an end-to-end basis. We evaluate our approach by comparing against
	existing neural network models for NER using three well-established biomedical
	datasets. Our experimental results show that the proposed approach consistently
	outperforms these strong baselines across all of the three datasets.},
  url       = {http://aclweb.org/anthology/W16-5102}
}

