@InProceedings{cook-EtAl:2017:BioNLP17,
  author    = {Cook, Helen  and  Berzins, Rudolfs  and  Rodrıguez, Cristina Leal  and  Cejuela, Juan Miguel  and  Jensen, Lars Juhl},
  title     = {Creation and evaluation of a dictionary-based tagger for virus species and proteins},
  booktitle = {BioNLP 2017},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada,},
  publisher = {Association for Computational Linguistics},
  pages     = {91--98},
  abstract  = {ext mining automatically extracts information from the literature with the goal
	of making it available for further analysis, for example by incorporating it
	into biomedical databases.  A key first step towards this goal is to identify
	and normalize the named entities, such as proteins and species, which are
	mentioned in text.  Despite the large detrimental impact that viruses have on
	human and agricultural health, very little previous text-mining work has
	focused on identifying virus species and proteins in the literature.  Here, we
	present an improved dictionary-based system for viral species and the first
	dictionary for viral proteins, which we benchmark on a new corpus of 300
	manually annotated abstracts.  We achieve 81.0\% precision and 72.7\% recall at
	the task of recognizing and normalizing viral species and 76.2\% precision and
	34.9\% recall on viral proteins.  These results are achieved despite the many
	challenges involved with the names of viral species and, especially, proteins. 
	This work provides a foundation that can be used to extract more complicated
	relations about viruses from the literature.},
  url       = {http://www.aclweb.org/anthology/W17-2311}
}

