@inproceedings{stoeckel-etal-2019-specialization,
title = "When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in {S}panish",
author = "Stoeckel, Manuel and
Hemati, Wahed and
Mehler, Alexander",
editor = "Jin-Dong, Kim and
Claire, N{\'e}dellec and
Robert, Bossy and
Louise, Del{\'e}ger",
booktitle = "Proceedings of the 5th Workshop on BioNLP Open Shared Tasks",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5702",
doi = "10.18653/v1/D19-5702",
pages = "11--15",
abstract = "The recognition of pharmacological substances, compounds and proteins is an essential preliminary work for the recognition of relations between chemicals and other biomedically relevant units. In this paper, we describe an approach to Task 1 of the PharmaCoNER Challenge, which involves the recognition of mentions of chemicals and drugs in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF sequence tagger with stacked Pooled Contextualized Embeddings, word and sub-word embeddings using the open-source framework FLAIR. We present a new corpus composed of articles and papers from Spanish health science journals, termed the Spanish Health Corpus, and use it to train domain-specific embeddings which we incorporate in our model training. We achieve a result of 89.76{\%} F1-score using pre-trained embeddings and are able to improve these results to 90.52{\%} F1-score using specialized embeddings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stoeckel-etal-2019-specialization">
<titleInfo>
<title>When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in Spanish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Stoeckel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wahed</namePart>
<namePart type="family">Hemati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Mehler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on BioNLP Open Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kim</namePart>
<namePart type="family">Jin-Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nédellec</namePart>
<namePart type="family">Claire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bossy</namePart>
<namePart type="family">Robert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deléger</namePart>
<namePart type="family">Louise</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The recognition of pharmacological substances, compounds and proteins is an essential preliminary work for the recognition of relations between chemicals and other biomedically relevant units. In this paper, we describe an approach to Task 1 of the PharmaCoNER Challenge, which involves the recognition of mentions of chemicals and drugs in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF sequence tagger with stacked Pooled Contextualized Embeddings, word and sub-word embeddings using the open-source framework FLAIR. We present a new corpus composed of articles and papers from Spanish health science journals, termed the Spanish Health Corpus, and use it to train domain-specific embeddings which we incorporate in our model training. We achieve a result of 89.76% F1-score using pre-trained embeddings and are able to improve these results to 90.52% F1-score using specialized embeddings.</abstract>
<identifier type="citekey">stoeckel-etal-2019-specialization</identifier>
<identifier type="doi">10.18653/v1/D19-5702</identifier>
<location>
<url>https://aclanthology.org/D19-5702</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>11</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in Spanish
%A Stoeckel, Manuel
%A Hemati, Wahed
%A Mehler, Alexander
%Y Jin-Dong, Kim
%Y Claire, Nédellec
%Y Robert, Bossy
%Y Louise, Deléger
%S Proceedings of the 5th Workshop on BioNLP Open Shared Tasks
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F stoeckel-etal-2019-specialization
%X The recognition of pharmacological substances, compounds and proteins is an essential preliminary work for the recognition of relations between chemicals and other biomedically relevant units. In this paper, we describe an approach to Task 1 of the PharmaCoNER Challenge, which involves the recognition of mentions of chemicals and drugs in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF sequence tagger with stacked Pooled Contextualized Embeddings, word and sub-word embeddings using the open-source framework FLAIR. We present a new corpus composed of articles and papers from Spanish health science journals, termed the Spanish Health Corpus, and use it to train domain-specific embeddings which we incorporate in our model training. We achieve a result of 89.76% F1-score using pre-trained embeddings and are able to improve these results to 90.52% F1-score using specialized embeddings.
%R 10.18653/v1/D19-5702
%U https://aclanthology.org/D19-5702
%U https://doi.org/10.18653/v1/D19-5702
%P 11-15
Markdown (Informal)
[When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in Spanish](https://aclanthology.org/D19-5702) (Stoeckel et al., BioNLP 2019)
ACL