@inproceedings{luo-etal-2021-ochadai,
title = "{OCHADAI} at {SMM}4{H}-2021 Task 5: Classifying self-reporting tweets on potential cases of {COVID}-19 by ensembling pre-trained language models",
author = "Luo, Ying and
Pereira, Lis and
Ichiro, Kobayashi",
booktitle = "Proceedings of the Sixth Social Media Mining for Health ({\#}SMM4H) Workshop and Shared Task",
month = jun,
year = "2021",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.smm4h-1.25",
doi = "10.18653/v1/2021.smm4h-1.25",
pages = "123--125",
abstract = {Since the outbreak of coronavirus at the end of 2019, there have been numerous studies on coro- navirus in the NLP arena. Meanwhile, Twitter has been a valuable source of news and a pub- lic medium for the conveyance of information and personal expression. This paper describes the system developed by the Ochadai team for the Social Media Mining for Health Appli- cations (SMM4H) 2021 Task 5, which aims to automatically distinguish English tweets that self-report potential cases of COVID-19 from those that do not. We proposed a model ensemble that leverages pre-trained represen- tations from COVID-Twitter-BERT (M{\"u}ller et al., 2020), RoBERTa (Liu et al., 2019), and Twitter-RoBERTa (Glazkova et al., 2021). Our model obtained F1-scores of 76{\%} on the test set in the evaluation phase, and 77.5{\%} in the post-evaluation phase.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2021-ochadai">
<titleInfo>
<title>OCHADAI at SMM4H-2021 Task 5: Classifying self-reporting tweets on potential cases of COVID-19 by ensembling pre-trained language models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lis</namePart>
<namePart type="family">Pereira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kobayashi</namePart>
<namePart type="family">Ichiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Social Media Mining for Health (#SMM4H) Workshop and Shared Task</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Since the outbreak of coronavirus at the end of 2019, there have been numerous studies on coro- navirus in the NLP arena. Meanwhile, Twitter has been a valuable source of news and a pub- lic medium for the conveyance of information and personal expression. This paper describes the system developed by the Ochadai team for the Social Media Mining for Health Appli- cations (SMM4H) 2021 Task 5, which aims to automatically distinguish English tweets that self-report potential cases of COVID-19 from those that do not. We proposed a model ensemble that leverages pre-trained represen- tations from COVID-Twitter-BERT (Müller et al., 2020), RoBERTa (Liu et al., 2019), and Twitter-RoBERTa (Glazkova et al., 2021). Our model obtained F1-scores of 76% on the test set in the evaluation phase, and 77.5% in the post-evaluation phase.</abstract>
<identifier type="citekey">luo-etal-2021-ochadai</identifier>
<identifier type="doi">10.18653/v1/2021.smm4h-1.25</identifier>
<location>
<url>https://aclanthology.org/2021.smm4h-1.25</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>123</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OCHADAI at SMM4H-2021 Task 5: Classifying self-reporting tweets on potential cases of COVID-19 by ensembling pre-trained language models
%A Luo, Ying
%A Pereira, Lis
%A Ichiro, Kobayashi
%S Proceedings of the Sixth Social Media Mining for Health (#SMM4H) Workshop and Shared Task
%D 2021
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F luo-etal-2021-ochadai
%X Since the outbreak of coronavirus at the end of 2019, there have been numerous studies on coro- navirus in the NLP arena. Meanwhile, Twitter has been a valuable source of news and a pub- lic medium for the conveyance of information and personal expression. This paper describes the system developed by the Ochadai team for the Social Media Mining for Health Appli- cations (SMM4H) 2021 Task 5, which aims to automatically distinguish English tweets that self-report potential cases of COVID-19 from those that do not. We proposed a model ensemble that leverages pre-trained represen- tations from COVID-Twitter-BERT (Müller et al., 2020), RoBERTa (Liu et al., 2019), and Twitter-RoBERTa (Glazkova et al., 2021). Our model obtained F1-scores of 76% on the test set in the evaluation phase, and 77.5% in the post-evaluation phase.
%R 10.18653/v1/2021.smm4h-1.25
%U https://aclanthology.org/2021.smm4h-1.25
%U https://doi.org/10.18653/v1/2021.smm4h-1.25
%P 123-125
Markdown (Informal)
[OCHADAI at SMM4H-2021 Task 5: Classifying self-reporting tweets on potential cases of COVID-19 by ensembling pre-trained language models](https://aclanthology.org/2021.smm4h-1.25) (Luo et al., SMM4H 2021)
ACL