@inproceedings{basaldella-collier-2019-bioreddit,
title = "{B}io{R}eddit: Word Embeddings for User-Generated Biomedical {NLP}",
author = "Basaldella, Marco and
Collier, Nigel",
editor = "Holderness, Eben and
Jimeno Yepes, Antonio and
Lavelli, Alberto and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)",
month = nov,
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-6205/",
doi = "10.18653/v1/D19-6205",
pages = "34--38",
abstract = "Word embeddings, in their different shapes and iterations, have changed the natural language processing research landscape in the last years. The biomedical text processing field is no stranger to this revolution; however, scholars in the field largely trained their embeddings on scientific documents only, even when working on user-generated data. In this paper we show how training embeddings from a corpus collected from user-generated text from medical forums heavily influences the performance on downstream tasks, outperforming embeddings trained both on general purpose data or on scientific papers when applied on user-generated content."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="basaldella-collier-2019-bioreddit">
<titleInfo>
<title>BioReddit: Word Embeddings for User-Generated Biomedical NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Basaldella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nigel</namePart>
<namePart type="family">Collier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word embeddings, in their different shapes and iterations, have changed the natural language processing research landscape in the last years. The biomedical text processing field is no stranger to this revolution; however, scholars in the field largely trained their embeddings on scientific documents only, even when working on user-generated data. In this paper we show how training embeddings from a corpus collected from user-generated text from medical forums heavily influences the performance on downstream tasks, outperforming embeddings trained both on general purpose data or on scientific papers when applied on user-generated content.</abstract>
<identifier type="citekey">basaldella-collier-2019-bioreddit</identifier>
<identifier type="doi">10.18653/v1/D19-6205</identifier>
<location>
<url>https://aclanthology.org/D19-6205/</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>34</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BioReddit: Word Embeddings for User-Generated Biomedical NLP
%A Basaldella, Marco
%A Collier, Nigel
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Lavelli, Alberto
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong
%F basaldella-collier-2019-bioreddit
%X Word embeddings, in their different shapes and iterations, have changed the natural language processing research landscape in the last years. The biomedical text processing field is no stranger to this revolution; however, scholars in the field largely trained their embeddings on scientific documents only, even when working on user-generated data. In this paper we show how training embeddings from a corpus collected from user-generated text from medical forums heavily influences the performance on downstream tasks, outperforming embeddings trained both on general purpose data or on scientific papers when applied on user-generated content.
%R 10.18653/v1/D19-6205
%U https://aclanthology.org/D19-6205/
%U https://doi.org/10.18653/v1/D19-6205
%P 34-38
Markdown (Informal)
[BioReddit: Word Embeddings for User-Generated Biomedical NLP](https://aclanthology.org/D19-6205/) (Basaldella & Collier, Louhi 2019)
ACL