@InProceedings{li-wu-vijayshanker:2017:BioNLP17,
  author    = {Li, Gang  and  Wu, Cathy  and  Vijay-Shanker, K.},
  title     = {Noise Reduction Methods for Distantly Supervised Biomedical Relation Extraction},
  booktitle = {BioNLP 2017},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada,},
  publisher = {Association for Computational Linguistics},
  pages     = {184--193},
  abstract  = {Distant supervision has been applied to automatically generate labeled data for
	biomedical relation extraction. Noise exists in both positively and
	negatively-labeled data and affects the performance of supervised machine
	learning methods. In this paper, we propose three novel heuristics based on the
	notion of proximity, trigger word and confidence of patterns to leverage
	lexical and syntactic information to reduce the level of noise in the distantly
	labeled data. Experiments on three different tasks, extraction of
	protein-protein-interaction, miRNA-gene regulation relation and
	protein-localization event, show that the proposed methods can improve the
	F-score over the baseline by 6, 10 and 14 points for the three tasks,
	respectively. We also show that when the models are configured to output
	high-confidence results, high precisions can be obtained using the proposed
	methods, making them promising for facilitating manual curation for databases.},
  url       = {http://www.aclweb.org/anthology/W17-2323}
}

