@InProceedings{yuwono-ng-ngiam:2016:ClinicalNLP,
  author    = {Yuwono, Steven Kester  and  Ng, Hwee Tou  and  Ngiam, Kee Yuan},
  title     = {Automated Anonymization as Spelling Variant Detection},
  booktitle = {Proceedings of the Clinical Natural Language Processing Workshop (ClinicalNLP)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {99--103},
  abstract  = {The issue of privacy has always been a concern when clinical texts are used for
	research purposes. Personal health information (PHI) (such as name and
	identification number) needs to be removed so that patients cannot be
	identified. Manual anonymization is not feasible due to the large number of
	clinical texts to be anonymized. In this paper, we tackle the task of
	anonymizing clinical texts written in sentence fragments and which frequently
	contain symbols, abbreviations, and misspelled words. Our clinical texts
	therefore differ from those in the i2b2 shared tasks which are in prose form
	with complete sentences. Our clinical texts are also part of a structured
	database which contains patient name and identification number in structured
	fields. As such, we formulate our anonymization task as spelling variant
	detection, exploiting patients' personal information in the structured fields
	to detect their spelling variants in clinical texts. We successfully anonymized
	clinical texts consisting of more than 200 million words, using minimum edit
	distance and regular expression patterns.},
  url       = {http://aclweb.org/anthology/W16-4214}
}

