@inproceedings{jimenez-gutierrez-etal-2020-document-classification,
title = "Document Classification for {COVID-19} Literature",
author = "Jim{\'e}nez Guti{\'e}rrez, Bernal and
Zeng, Juncheng and
Zhang, Dongdong and
Zhang, Ping and
Su, Yu",
editor = "Verspoor, Karin and
Cohen, Kevin Bretonnel and
Dredze, Mark and
Ferrara, Emilio and
May, Jonathan and
Munro, Robert and
Paris, Cecile and
Wallace, Byron",
booktitle = "Proceedings of the 1st Workshop on {NLP} for {COVID-19} at {ACL} 2020",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlpcovid19-acl.3",
abstract = "The global pandemic has made it more important than ever to quickly and accurately retrieve relevant scientific literature for effective consumption by researchers in a wide range of fields. We provide an analysis of several multi-label document classification models on the LitCovid dataset. We find that pre-trained language models outperform other models in both low and high data regimes, achieving a maximum F1 score of around 86{\%}. We note that even the highest performing models still struggle with label correlation, distraction from introductory text and CORD-19 generalization. Both data and code are available on GitHub.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jimenez-gutierrez-etal-2020-document-classification">
<titleInfo>
<title>Document Classification for COVID-19 Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bernal</namePart>
<namePart type="family">Jiménez Gutiérrez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juncheng</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongdong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ping</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">Verspoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dredze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emilio</namePart>
<namePart type="family">Ferrara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Munro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cecile</namePart>
<namePart type="family">Paris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byron</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The global pandemic has made it more important than ever to quickly and accurately retrieve relevant scientific literature for effective consumption by researchers in a wide range of fields. We provide an analysis of several multi-label document classification models on the LitCovid dataset. We find that pre-trained language models outperform other models in both low and high data regimes, achieving a maximum F1 score of around 86%. We note that even the highest performing models still struggle with label correlation, distraction from introductory text and CORD-19 generalization. Both data and code are available on GitHub.</abstract>
<identifier type="citekey">jimenez-gutierrez-etal-2020-document-classification</identifier>
<location>
<url>https://aclanthology.org/2020.nlpcovid19-acl.3</url>
</location>
<part>
<date>2020-07</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Document Classification for COVID-19 Literature
%A Jiménez Gutiérrez, Bernal
%A Zeng, Juncheng
%A Zhang, Dongdong
%A Zhang, Ping
%A Su, Yu
%Y Verspoor, Karin
%Y Cohen, Kevin Bretonnel
%Y Dredze, Mark
%Y Ferrara, Emilio
%Y May, Jonathan
%Y Munro, Robert
%Y Paris, Cecile
%Y Wallace, Byron
%S Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F jimenez-gutierrez-etal-2020-document-classification
%X The global pandemic has made it more important than ever to quickly and accurately retrieve relevant scientific literature for effective consumption by researchers in a wide range of fields. We provide an analysis of several multi-label document classification models on the LitCovid dataset. We find that pre-trained language models outperform other models in both low and high data regimes, achieving a maximum F1 score of around 86%. We note that even the highest performing models still struggle with label correlation, distraction from introductory text and CORD-19 generalization. Both data and code are available on GitHub.
%U https://aclanthology.org/2020.nlpcovid19-acl.3
Markdown (Informal)
[Document Classification for COVID-19 Literature](https://aclanthology.org/2020.nlpcovid19-acl.3) (Jiménez Gutiérrez et al., NLP-COVID19 2020)
ACL
- Bernal Jiménez Gutiérrez, Juncheng Zeng, Dongdong Zhang, Ping Zhang, and Yu Su. 2020. Document Classification for COVID-19 Literature. In Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020, Online. Association for Computational Linguistics.