@inproceedings{el-boukkouri-etal-2019-embedding,
title = "Embedding Strategies for Specialized Domains: Application to Clinical Entity Recognition",
author = "El Boukkouri, Hicham and
Ferret, Olivier and
Lavergne, Thomas and
Zweigenbaum, Pierre",
editor = "Alva-Manchego, Fernando and
Choi, Eunsol and
Khashabi, Daniel",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-2041",
doi = "10.18653/v1/P19-2041",
pages = "295--301",
abstract = "Using pre-trained word embeddings in conjunction with Deep Learning models has become the {``}de facto{''} approach in Natural Language Processing (NLP). While this usually yields satisfactory results, off-the-shelf word embeddings tend to perform poorly on texts from specialized domains such as clinical reports. Moreover, training specialized word representations from scratch is often either impossible or ineffective due to the lack of large enough in-domain data. In this work, we focus on the clinical domain for which we study embedding strategies that rely on general-domain resources only. We show that by combining off-the-shelf contextual embeddings (ELMo) with static word2vec embeddings trained on a small in-domain corpus built from the task data, we manage to reach and sometimes outperform representations learned from a large corpus in the medical domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="el-boukkouri-etal-2019-embedding">
<titleInfo>
<title>Embedding Strategies for Specialized Domains: Application to Clinical Entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hicham</namePart>
<namePart type="family">El Boukkouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivier</namePart>
<namePart type="family">Ferret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Lavergne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Khashabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using pre-trained word embeddings in conjunction with Deep Learning models has become the “de facto” approach in Natural Language Processing (NLP). While this usually yields satisfactory results, off-the-shelf word embeddings tend to perform poorly on texts from specialized domains such as clinical reports. Moreover, training specialized word representations from scratch is often either impossible or ineffective due to the lack of large enough in-domain data. In this work, we focus on the clinical domain for which we study embedding strategies that rely on general-domain resources only. We show that by combining off-the-shelf contextual embeddings (ELMo) with static word2vec embeddings trained on a small in-domain corpus built from the task data, we manage to reach and sometimes outperform representations learned from a large corpus in the medical domain.</abstract>
<identifier type="citekey">el-boukkouri-etal-2019-embedding</identifier>
<identifier type="doi">10.18653/v1/P19-2041</identifier>
<location>
<url>https://aclanthology.org/P19-2041</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>295</start>
<end>301</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Embedding Strategies for Specialized Domains: Application to Clinical Entity Recognition
%A El Boukkouri, Hicham
%A Ferret, Olivier
%A Lavergne, Thomas
%A Zweigenbaum, Pierre
%Y Alva-Manchego, Fernando
%Y Choi, Eunsol
%Y Khashabi, Daniel
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F el-boukkouri-etal-2019-embedding
%X Using pre-trained word embeddings in conjunction with Deep Learning models has become the “de facto” approach in Natural Language Processing (NLP). While this usually yields satisfactory results, off-the-shelf word embeddings tend to perform poorly on texts from specialized domains such as clinical reports. Moreover, training specialized word representations from scratch is often either impossible or ineffective due to the lack of large enough in-domain data. In this work, we focus on the clinical domain for which we study embedding strategies that rely on general-domain resources only. We show that by combining off-the-shelf contextual embeddings (ELMo) with static word2vec embeddings trained on a small in-domain corpus built from the task data, we manage to reach and sometimes outperform representations learned from a large corpus in the medical domain.
%R 10.18653/v1/P19-2041
%U https://aclanthology.org/P19-2041
%U https://doi.org/10.18653/v1/P19-2041
%P 295-301
Markdown (Informal)
[Embedding Strategies for Specialized Domains: Application to Clinical Entity Recognition](https://aclanthology.org/P19-2041) (El Boukkouri et al., ACL 2019)
ACL