@inproceedings{piskorski-etal-2020-new,
title = "New Benchmark Corpus and Models for Fine-grained Event Classification: To {BERT} or not to {BERT}?",
author = "Piskorski, Jakub and
Haneczok, Jacek and
Jacquet, Guillaume",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.584",
doi = "10.18653/v1/2020.coling-main.584",
pages = "6663--6678",
abstract = "We introduce a new set of benchmark datasets derived from ACLED data for fine-grained event classification and compare the performance of various state-of-the-art models on these datasets, including SVM based on TF-IDF character n-grams and neural context-free embeddings (GLOVE and FASTTEXT) as well as deep learning-based BERT with its contextual embeddings. The best results in terms of micro (94.3-94.9{\%}) and macro F1 (86.0-88.9{\%}) were obtained using BERT transformer, with simpler TF-IDF character n-gram based SVM being an interesting alternative. Further, we discuss the pros and cons of the considered benchmark models in terms of their robustness and the dependence of the classification performance on the size of training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="piskorski-etal-2020-new">
<titleInfo>
<title>New Benchmark Corpus and Models for Fine-grained Event Classification: To BERT or not to BERT?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacek</namePart>
<namePart type="family">Haneczok</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Jacquet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce a new set of benchmark datasets derived from ACLED data for fine-grained event classification and compare the performance of various state-of-the-art models on these datasets, including SVM based on TF-IDF character n-grams and neural context-free embeddings (GLOVE and FASTTEXT) as well as deep learning-based BERT with its contextual embeddings. The best results in terms of micro (94.3-94.9%) and macro F1 (86.0-88.9%) were obtained using BERT transformer, with simpler TF-IDF character n-gram based SVM being an interesting alternative. Further, we discuss the pros and cons of the considered benchmark models in terms of their robustness and the dependence of the classification performance on the size of training data.</abstract>
<identifier type="citekey">piskorski-etal-2020-new</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.584</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.584</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>6663</start>
<end>6678</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T New Benchmark Corpus and Models for Fine-grained Event Classification: To BERT or not to BERT?
%A Piskorski, Jakub
%A Haneczok, Jacek
%A Jacquet, Guillaume
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F piskorski-etal-2020-new
%X We introduce a new set of benchmark datasets derived from ACLED data for fine-grained event classification and compare the performance of various state-of-the-art models on these datasets, including SVM based on TF-IDF character n-grams and neural context-free embeddings (GLOVE and FASTTEXT) as well as deep learning-based BERT with its contextual embeddings. The best results in terms of micro (94.3-94.9%) and macro F1 (86.0-88.9%) were obtained using BERT transformer, with simpler TF-IDF character n-gram based SVM being an interesting alternative. Further, we discuss the pros and cons of the considered benchmark models in terms of their robustness and the dependence of the classification performance on the size of training data.
%R 10.18653/v1/2020.coling-main.584
%U https://aclanthology.org/2020.coling-main.584
%U https://doi.org/10.18653/v1/2020.coling-main.584
%P 6663-6678
Markdown (Informal)
[New Benchmark Corpus and Models for Fine-grained Event Classification: To BERT or not to BERT?](https://aclanthology.org/2020.coling-main.584) (Piskorski et al., COLING 2020)
ACL