@inproceedings{zhu-etal-2020-identifying,
title = "Identifying Personal Experience Tweets of Medication Effects Using Pre-trained {R}o{BERT}a Language Model and Its Updating",
author = "Zhu, Minghao and
Song, Youzhe and
Jin, Ge and
Jiang, Keyuan",
editor = "Holderness, Eben and
Jimeno Yepes, Antonio and
Lavelli, Alberto and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.louhi-1.14",
doi = "10.18653/v1/2020.louhi-1.14",
pages = "127--137",
abstract = "Post-market surveillance, the practice of monitoring the safe use of pharmaceutical drugs is an important part of pharmacovigilance. Being able to collect personal experience related to pharmaceutical product use could help us gain insight into how the human body reacts to different medications. Twitter, a popular social media service, is being considered as an important alternative data source for collecting personal experience information with medications. Identifying personal experience tweets is a challenging classification task in natural language processing. In this study, we utilized three methods based on Facebook{'}s Robustly Optimized BERT Pretraining Approach (RoBERTa) to predict personal experience tweets related to medication use: the first one combines the pre-trained RoBERTa model with a classifier, the second combines the updated pre-trained RoBERTa model using a corpus of unlabeled tweets with a classifier, and the third combines the RoBERTa model that was trained with our unlabeled tweets from scratch with the classifier too. Our results show that all of these approaches outperform the published methods (Word Embedding + LSTM) in classification performance (p {\textless} 0.05), and updating the pre-trained language model with tweets related to medications could even improve the performance further.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2020-identifying">
<titleInfo>
<title>Identifying Personal Experience Tweets of Medication Effects Using Pre-trained RoBERTa Language Model and Its Updating</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minghao</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youzhe</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ge</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keyuan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Post-market surveillance, the practice of monitoring the safe use of pharmaceutical drugs is an important part of pharmacovigilance. Being able to collect personal experience related to pharmaceutical product use could help us gain insight into how the human body reacts to different medications. Twitter, a popular social media service, is being considered as an important alternative data source for collecting personal experience information with medications. Identifying personal experience tweets is a challenging classification task in natural language processing. In this study, we utilized three methods based on Facebook’s Robustly Optimized BERT Pretraining Approach (RoBERTa) to predict personal experience tweets related to medication use: the first one combines the pre-trained RoBERTa model with a classifier, the second combines the updated pre-trained RoBERTa model using a corpus of unlabeled tweets with a classifier, and the third combines the RoBERTa model that was trained with our unlabeled tweets from scratch with the classifier too. Our results show that all of these approaches outperform the published methods (Word Embedding + LSTM) in classification performance (p \textless 0.05), and updating the pre-trained language model with tweets related to medications could even improve the performance further.</abstract>
<identifier type="citekey">zhu-etal-2020-identifying</identifier>
<identifier type="doi">10.18653/v1/2020.louhi-1.14</identifier>
<location>
<url>https://aclanthology.org/2020.louhi-1.14</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>127</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Personal Experience Tweets of Medication Effects Using Pre-trained RoBERTa Language Model and Its Updating
%A Zhu, Minghao
%A Song, Youzhe
%A Jin, Ge
%A Jiang, Keyuan
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Lavelli, Alberto
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F zhu-etal-2020-identifying
%X Post-market surveillance, the practice of monitoring the safe use of pharmaceutical drugs is an important part of pharmacovigilance. Being able to collect personal experience related to pharmaceutical product use could help us gain insight into how the human body reacts to different medications. Twitter, a popular social media service, is being considered as an important alternative data source for collecting personal experience information with medications. Identifying personal experience tweets is a challenging classification task in natural language processing. In this study, we utilized three methods based on Facebook’s Robustly Optimized BERT Pretraining Approach (RoBERTa) to predict personal experience tweets related to medication use: the first one combines the pre-trained RoBERTa model with a classifier, the second combines the updated pre-trained RoBERTa model using a corpus of unlabeled tweets with a classifier, and the third combines the RoBERTa model that was trained with our unlabeled tweets from scratch with the classifier too. Our results show that all of these approaches outperform the published methods (Word Embedding + LSTM) in classification performance (p \textless 0.05), and updating the pre-trained language model with tweets related to medications could even improve the performance further.
%R 10.18653/v1/2020.louhi-1.14
%U https://aclanthology.org/2020.louhi-1.14
%U https://doi.org/10.18653/v1/2020.louhi-1.14
%P 127-137
Markdown (Informal)
[Identifying Personal Experience Tweets of Medication Effects Using Pre-trained RoBERTa Language Model and Its Updating](https://aclanthology.org/2020.louhi-1.14) (Zhu et al., Louhi 2020)
ACL