@inproceedings{sheikhshabbafghi-etal-2018-domain,
title = "In-domain Context-aware Token Embeddings Improve Biomedical Named Entity Recognition",
author = "Sheikhshabbafghi, Golnar and
Birol, Inanc and
Sarkar, Anoop",
editor = "Lavelli, Alberto and
Minard, Anne-Lyse and
Rinaldi, Fabio",
booktitle = "Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-5618/",
doi = "10.18653/v1/W18-5618",
pages = "160--164",
abstract = "Rapidly expanding volume of publications in the biomedical domain makes it increasingly difficult for a timely evaluation of the latest literature. That, along with a push for automated evaluation of clinical reports, present opportunities for effective natural language processing methods. In this study we target the problem of named entity recognition, where texts are processed to annotate terms that are relevant for biomedical studies. Terms of interest in the domain include gene and protein names, and cell lines and types. Here we report on a pipeline built on Embeddings from Language Models (ELMo) and a deep learning package for natural language processing (AllenNLP). We trained context-aware token embeddings on a dataset of biomedical papers using ELMo, and incorporated these embeddings in the LSTM-CRF model used by AllenNLP for named entity recognition. We show these representations improve named entity recognition for different types of biomedical named entities. We also achieve a new state of the art in gene mention detection on the BioCreative II gene mention shared task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sheikhshabbafghi-etal-2018-domain">
<titleInfo>
<title>In-domain Context-aware Token Embeddings Improve Biomedical Named Entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Golnar</namePart>
<namePart type="family">Sheikhshabbafghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inanc</namePart>
<namePart type="family">Birol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Rapidly expanding volume of publications in the biomedical domain makes it increasingly difficult for a timely evaluation of the latest literature. That, along with a push for automated evaluation of clinical reports, present opportunities for effective natural language processing methods. In this study we target the problem of named entity recognition, where texts are processed to annotate terms that are relevant for biomedical studies. Terms of interest in the domain include gene and protein names, and cell lines and types. Here we report on a pipeline built on Embeddings from Language Models (ELMo) and a deep learning package for natural language processing (AllenNLP). We trained context-aware token embeddings on a dataset of biomedical papers using ELMo, and incorporated these embeddings in the LSTM-CRF model used by AllenNLP for named entity recognition. We show these representations improve named entity recognition for different types of biomedical named entities. We also achieve a new state of the art in gene mention detection on the BioCreative II gene mention shared task.</abstract>
<identifier type="citekey">sheikhshabbafghi-etal-2018-domain</identifier>
<identifier type="doi">10.18653/v1/W18-5618</identifier>
<location>
<url>https://aclanthology.org/W18-5618/</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>160</start>
<end>164</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T In-domain Context-aware Token Embeddings Improve Biomedical Named Entity Recognition
%A Sheikhshabbafghi, Golnar
%A Birol, Inanc
%A Sarkar, Anoop
%Y Lavelli, Alberto
%Y Minard, Anne-Lyse
%Y Rinaldi, Fabio
%S Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F sheikhshabbafghi-etal-2018-domain
%X Rapidly expanding volume of publications in the biomedical domain makes it increasingly difficult for a timely evaluation of the latest literature. That, along with a push for automated evaluation of clinical reports, present opportunities for effective natural language processing methods. In this study we target the problem of named entity recognition, where texts are processed to annotate terms that are relevant for biomedical studies. Terms of interest in the domain include gene and protein names, and cell lines and types. Here we report on a pipeline built on Embeddings from Language Models (ELMo) and a deep learning package for natural language processing (AllenNLP). We trained context-aware token embeddings on a dataset of biomedical papers using ELMo, and incorporated these embeddings in the LSTM-CRF model used by AllenNLP for named entity recognition. We show these representations improve named entity recognition for different types of biomedical named entities. We also achieve a new state of the art in gene mention detection on the BioCreative II gene mention shared task.
%R 10.18653/v1/W18-5618
%U https://aclanthology.org/W18-5618/
%U https://doi.org/10.18653/v1/W18-5618
%P 160-164
Markdown (Informal)
[In-domain Context-aware Token Embeddings Improve Biomedical Named Entity Recognition](https://aclanthology.org/W18-5618/) (Sheikhshabbafghi et al., Louhi 2018)
ACL