@inproceedings{cook-etal-2017-creation,
title = "Creation and evaluation of a dictionary-based tagger for virus species and proteins",
author = "Cook, Helen and
B{\={e}}rzi{\c{n}}{\v{s}}, R{\={u}}dolfs and
Rodr{\i}guez, Cristina Leal and
Cejuela, Juan Miguel and
Jensen, Lars Juhl",
editor = "Cohen, Kevin Bretonnel and
Demner-Fushman, Dina and
Ananiadou, Sophia and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2017",
month = aug,
year = "2017",
address = "Vancouver, Canada,",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2311/",
doi = "10.18653/v1/W17-2311",
pages = "91--98",
abstract = "ext mining automatically extracts information from the literature with the goal of making it available for further analysis, for example by incorporating it into biomedical databases. A key first step towards this goal is to identify and normalize the named entities, such as proteins and species, which are mentioned in text. Despite the large detrimental impact that viruses have on human and agricultural health, very little previous text-mining work has focused on identifying virus species and proteins in the literature. Here, we present an improved dictionary-based system for viral species and the first dictionary for viral proteins, which we benchmark on a new corpus of 300 manually annotated abstracts. We achieve 81.0{\%} precision and 72.7{\%} recall at the task of recognizing and normalizing viral species and 76.2{\%} precision and 34.9{\%} recall on viral proteins. These results are achieved despite the many challenges involved with the names of viral species and, especially, proteins. This work provides a foundation that can be used to extract more complicated relations about viruses from the literature."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cook-etal-2017-creation">
<titleInfo>
<title>Creation and evaluation of a dictionary-based tagger for virus species and proteins</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Cook</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rūdolfs</namePart>
<namePart type="family">Bērziņš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="given">Leal</namePart>
<namePart type="family">Rodrıguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Miguel</namePart>
<namePart type="family">Cejuela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="given">Juhl</namePart>
<namePart type="family">Jensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada,</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>ext mining automatically extracts information from the literature with the goal of making it available for further analysis, for example by incorporating it into biomedical databases. A key first step towards this goal is to identify and normalize the named entities, such as proteins and species, which are mentioned in text. Despite the large detrimental impact that viruses have on human and agricultural health, very little previous text-mining work has focused on identifying virus species and proteins in the literature. Here, we present an improved dictionary-based system for viral species and the first dictionary for viral proteins, which we benchmark on a new corpus of 300 manually annotated abstracts. We achieve 81.0% precision and 72.7% recall at the task of recognizing and normalizing viral species and 76.2% precision and 34.9% recall on viral proteins. These results are achieved despite the many challenges involved with the names of viral species and, especially, proteins. This work provides a foundation that can be used to extract more complicated relations about viruses from the literature.</abstract>
<identifier type="citekey">cook-etal-2017-creation</identifier>
<identifier type="doi">10.18653/v1/W17-2311</identifier>
<location>
<url>https://aclanthology.org/W17-2311/</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>91</start>
<end>98</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Creation and evaluation of a dictionary-based tagger for virus species and proteins
%A Cook, Helen
%A Bērziņš, Rūdolfs
%A Rodrıguez, Cristina Leal
%A Cejuela, Juan Miguel
%A Jensen, Lars Juhl
%Y Cohen, Kevin Bretonnel
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Tsujii, Junichi
%S BioNLP 2017
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada,
%F cook-etal-2017-creation
%X ext mining automatically extracts information from the literature with the goal of making it available for further analysis, for example by incorporating it into biomedical databases. A key first step towards this goal is to identify and normalize the named entities, such as proteins and species, which are mentioned in text. Despite the large detrimental impact that viruses have on human and agricultural health, very little previous text-mining work has focused on identifying virus species and proteins in the literature. Here, we present an improved dictionary-based system for viral species and the first dictionary for viral proteins, which we benchmark on a new corpus of 300 manually annotated abstracts. We achieve 81.0% precision and 72.7% recall at the task of recognizing and normalizing viral species and 76.2% precision and 34.9% recall on viral proteins. These results are achieved despite the many challenges involved with the names of viral species and, especially, proteins. This work provides a foundation that can be used to extract more complicated relations about viruses from the literature.
%R 10.18653/v1/W17-2311
%U https://aclanthology.org/W17-2311/
%U https://doi.org/10.18653/v1/W17-2311
%P 91-98
Markdown (Informal)
[Creation and evaluation of a dictionary-based tagger for virus species and proteins](https://aclanthology.org/W17-2311/) (Cook et al., BioNLP 2017)
ACL