@inproceedings{piskorski-jacquet-2020-tf,
title = "{TF}-{IDF} Character {N}-grams versus Word Embedding-based Models for Fine-grained Event Classification: A Preliminary Study",
author = "Piskorski, Jakub and
Jacquet, Guillaume",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Y{\"o}r{\"u}k, Erdem and
Zavarella, Vanni and
Tanev, Hristo},
booktitle = "Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.aespen-1.6",
pages = "26--34",
abstract = "Automating the detection of event mentions in online texts and their classification vis-a-vis domain-specific event type taxonomies has been acknowledged by many organisations worldwide to be of paramount importance in order to facilitate the process of intelligence gathering. This paper reports on some preliminary experiments of comparing various linguistically-lightweight approaches for fine-grained event classification based on short text snippets reporting on events. In particular, we compare the performance of a TF-IDF-weighted character n-gram SVM-based model versus SVMs trained on various of-the-shelf pre-trained word embeddings (GloVe, BERT, FastText) as features. We exploit a relatively large event corpus consisting of circa 610K short text event descriptions classified using a 25-event categories that cover political violence and protest events. The best results, i.e., 83.5{\%} macro and 92.4{\%} micro F1 score, were obtained using the TF-IDF-weighted character n-gram model.",
language = "English",
ISBN = "979-10-95546-50-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="piskorski-jacquet-2020-tf">
<titleInfo>
<title>TF-IDF Character N-grams versus Word Embedding-based Models for Fine-grained Event Classification: A Preliminary Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Jacquet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erdem</namePart>
<namePart type="family">Yörük</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanni</namePart>
<namePart type="family">Zavarella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-50-4</identifier>
</relatedItem>
<abstract>Automating the detection of event mentions in online texts and their classification vis-a-vis domain-specific event type taxonomies has been acknowledged by many organisations worldwide to be of paramount importance in order to facilitate the process of intelligence gathering. This paper reports on some preliminary experiments of comparing various linguistically-lightweight approaches for fine-grained event classification based on short text snippets reporting on events. In particular, we compare the performance of a TF-IDF-weighted character n-gram SVM-based model versus SVMs trained on various of-the-shelf pre-trained word embeddings (GloVe, BERT, FastText) as features. We exploit a relatively large event corpus consisting of circa 610K short text event descriptions classified using a 25-event categories that cover political violence and protest events. The best results, i.e., 83.5% macro and 92.4% micro F1 score, were obtained using the TF-IDF-weighted character n-gram model.</abstract>
<identifier type="citekey">piskorski-jacquet-2020-tf</identifier>
<location>
<url>https://aclanthology.org/2020.aespen-1.6</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>26</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TF-IDF Character N-grams versus Word Embedding-based Models for Fine-grained Event Classification: A Preliminary Study
%A Piskorski, Jakub
%A Jacquet, Guillaume
%Y Hürriyetoğlu, Ali
%Y Yörük, Erdem
%Y Zavarella, Vanni
%Y Tanev, Hristo
%S Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-50-4
%G English
%F piskorski-jacquet-2020-tf
%X Automating the detection of event mentions in online texts and their classification vis-a-vis domain-specific event type taxonomies has been acknowledged by many organisations worldwide to be of paramount importance in order to facilitate the process of intelligence gathering. This paper reports on some preliminary experiments of comparing various linguistically-lightweight approaches for fine-grained event classification based on short text snippets reporting on events. In particular, we compare the performance of a TF-IDF-weighted character n-gram SVM-based model versus SVMs trained on various of-the-shelf pre-trained word embeddings (GloVe, BERT, FastText) as features. We exploit a relatively large event corpus consisting of circa 610K short text event descriptions classified using a 25-event categories that cover political violence and protest events. The best results, i.e., 83.5% macro and 92.4% micro F1 score, were obtained using the TF-IDF-weighted character n-gram model.
%U https://aclanthology.org/2020.aespen-1.6
%P 26-34
Markdown (Informal)
[TF-IDF Character N-grams versus Word Embedding-based Models for Fine-grained Event Classification: A Preliminary Study](https://aclanthology.org/2020.aespen-1.6) (Piskorski & Jacquet, AESPEN 2020)
ACL