@InProceedings{lee-EtAl:2016:ClinicalNLP,
  author    = {Lee, Ji Young  and  Dernoncourt, Franck  and  Uzuner, Ozlem  and  Szolovits, Peter},
  title     = {Feature-Augmented Neural Networks for Patient Note De-identification},
  booktitle = {Proceedings of the Clinical Natural Language Processing Workshop (ClinicalNLP)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {17--22},
  abstract  = {Patient notes contain a wealth of information of potentially great interest to
	medical investigators. However, to protect patients' privacy, Protected Health
	Information (PHI) must be removed from the patient notes before they can be
	legally released, a process known as patient note de-identification. The main
	objective for a de-identification system is to have the highest possible
	recall. Recently, the first neural-network-based de-identification system has
	been proposed, yielding state-of-the-art results. Unlike other systems, it does
	not rely on human-engineered features, which allows it to be quickly deployed,
	but does not leverage knowledge from human experts or from electronic health
	records (EHRs). In this work, we explore a method to incorporate
	human-engineered features as well as features derived from EHRs to a
	neural-network-based de-identification system. Our results show that the
	addition of features, especially the EHR-derived features, further improves the
	state-of-the-art in patient note de-identification, including for some of the
	most sensitive PHI types such as patient names. Since in a real-life setting
	patient notes typically come with EHRs, we recommend developers of
	de-identification systems to leverage the information EHRs contain.},
  url       = {http://aclweb.org/anthology/W16-4204}
}

