@inproceedings{olsson-etal-2020-text,
title = "Text Categorization for Conflict Event Annotation",
author = "Olsson, Fredrik and
Sahlgren, Magnus and
ben Abdesslem, Fehmi and
Ekgren, Ariel and
Eck, Kristine",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Y{\"o}r{\"u}k, Erdem and
Zavarella, Vanni and
Tanev, Hristo},
booktitle = "Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.aespen-1.5",
pages = "19--25",
abstract = "We cast the problem of event annotation as one of text categorization, and compare state of the art text categorization techniques on event data produced within the Uppsala Conflict Data Program (UCDP). Annotating a single text involves assigning the labels pertaining to at least 17 distinct categorization tasks, e.g., who were the attacking organization, who was attacked, and where did the event take place. The text categorization techniques under scrutiny are a classical Bag-of-Words approach; character-based contextualized embeddings produced by ELMo; embeddings produced by the BERT base model, and a version of BERT base fine-tuned on UCDP data; and a pre-trained and fine-tuned classifier based on ULMFiT. The categorization tasks are very diverse in terms of the number of classes to predict as well as the skeweness of the distribution of classes. The categorization results exhibit a large variability across tasks, ranging from 30.3{\%} to 99.8{\%} F-score.",
language = "English",
ISBN = "979-10-95546-50-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="olsson-etal-2020-text">
<titleInfo>
<title>Text Categorization for Conflict Event Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fredrik</namePart>
<namePart type="family">Olsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magnus</namePart>
<namePart type="family">Sahlgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fehmi</namePart>
<namePart type="family">ben Abdesslem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ariel</namePart>
<namePart type="family">Ekgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristine</namePart>
<namePart type="family">Eck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erdem</namePart>
<namePart type="family">Yörük</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanni</namePart>
<namePart type="family">Zavarella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-50-4</identifier>
</relatedItem>
<abstract>We cast the problem of event annotation as one of text categorization, and compare state of the art text categorization techniques on event data produced within the Uppsala Conflict Data Program (UCDP). Annotating a single text involves assigning the labels pertaining to at least 17 distinct categorization tasks, e.g., who were the attacking organization, who was attacked, and where did the event take place. The text categorization techniques under scrutiny are a classical Bag-of-Words approach; character-based contextualized embeddings produced by ELMo; embeddings produced by the BERT base model, and a version of BERT base fine-tuned on UCDP data; and a pre-trained and fine-tuned classifier based on ULMFiT. The categorization tasks are very diverse in terms of the number of classes to predict as well as the skeweness of the distribution of classes. The categorization results exhibit a large variability across tasks, ranging from 30.3% to 99.8% F-score.</abstract>
<identifier type="citekey">olsson-etal-2020-text</identifier>
<location>
<url>https://aclanthology.org/2020.aespen-1.5</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>19</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text Categorization for Conflict Event Annotation
%A Olsson, Fredrik
%A Sahlgren, Magnus
%A ben Abdesslem, Fehmi
%A Ekgren, Ariel
%A Eck, Kristine
%Y Hürriyetoğlu, Ali
%Y Yörük, Erdem
%Y Zavarella, Vanni
%Y Tanev, Hristo
%S Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-50-4
%G English
%F olsson-etal-2020-text
%X We cast the problem of event annotation as one of text categorization, and compare state of the art text categorization techniques on event data produced within the Uppsala Conflict Data Program (UCDP). Annotating a single text involves assigning the labels pertaining to at least 17 distinct categorization tasks, e.g., who were the attacking organization, who was attacked, and where did the event take place. The text categorization techniques under scrutiny are a classical Bag-of-Words approach; character-based contextualized embeddings produced by ELMo; embeddings produced by the BERT base model, and a version of BERT base fine-tuned on UCDP data; and a pre-trained and fine-tuned classifier based on ULMFiT. The categorization tasks are very diverse in terms of the number of classes to predict as well as the skeweness of the distribution of classes. The categorization results exhibit a large variability across tasks, ranging from 30.3% to 99.8% F-score.
%U https://aclanthology.org/2020.aespen-1.5
%P 19-25
Markdown (Informal)
[Text Categorization for Conflict Event Annotation](https://aclanthology.org/2020.aespen-1.5) (Olsson et al., AESPEN 2020)
ACL
- Fredrik Olsson, Magnus Sahlgren, Fehmi ben Abdesslem, Ariel Ekgren, and Kristine Eck. 2020. Text Categorization for Conflict Event Annotation. In Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020, pages 19–25, Marseille, France. European Language Resources Association (ELRA).