@inproceedings{wiedemann-etal-2022-generalized,
title = "A Generalized Approach to Protest Event Detection in {G}erman Local News",
author = "Wiedemann, Gregor and
Dollbaum, Jan Matti and
Haunss, Sebastian and
Daphi, Priska and
Meier, Larissa Daria",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.413",
pages = "3883--3891",
abstract = "Protest events provide information about social and political conflicts, the state of social cohesion and democratic conflict management, as well as the state of civil society in general. Social scientists are therefore interested in the systematic observation of protest events. With this paper, we release the first German language resource of protest event related article excerpts published in local news outlets. We use this dataset to train and evaluate transformer-based text classifiers to automatically detect relevant newspaper articles. Our best approach reaches a binary F1-score of 93.3 {\%}, which is a promising result for our goal to support political science research. However, in a second experiment, we show that our model does not generalize equally well when applied to data from time periods and localities other than our training sample. To make protest event detection more robust, we test two ways of alternative preprocessing. First, we find that letting the classifier concentrate on sentences around protest keywords improves the F1-score for out-of-sample data up to +4 percentage points. Second, against our initial intuition, masking of named entities during preprocessing does not improve the generalization in terms of F1-scores. However, it leads to a significantly improved recall of the models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wiedemann-etal-2022-generalized">
<titleInfo>
<title>A Generalized Approach to Protest Event Detection in German Local News</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gregor</namePart>
<namePart type="family">Wiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Matti</namePart>
<namePart type="family">Dollbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Haunss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priska</namePart>
<namePart type="family">Daphi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="given">Daria</namePart>
<namePart type="family">Meier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Protest events provide information about social and political conflicts, the state of social cohesion and democratic conflict management, as well as the state of civil society in general. Social scientists are therefore interested in the systematic observation of protest events. With this paper, we release the first German language resource of protest event related article excerpts published in local news outlets. We use this dataset to train and evaluate transformer-based text classifiers to automatically detect relevant newspaper articles. Our best approach reaches a binary F1-score of 93.3 %, which is a promising result for our goal to support political science research. However, in a second experiment, we show that our model does not generalize equally well when applied to data from time periods and localities other than our training sample. To make protest event detection more robust, we test two ways of alternative preprocessing. First, we find that letting the classifier concentrate on sentences around protest keywords improves the F1-score for out-of-sample data up to +4 percentage points. Second, against our initial intuition, masking of named entities during preprocessing does not improve the generalization in terms of F1-scores. However, it leads to a significantly improved recall of the models.</abstract>
<identifier type="citekey">wiedemann-etal-2022-generalized</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.413</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>3883</start>
<end>3891</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Generalized Approach to Protest Event Detection in German Local News
%A Wiedemann, Gregor
%A Dollbaum, Jan Matti
%A Haunss, Sebastian
%A Daphi, Priska
%A Meier, Larissa Daria
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F wiedemann-etal-2022-generalized
%X Protest events provide information about social and political conflicts, the state of social cohesion and democratic conflict management, as well as the state of civil society in general. Social scientists are therefore interested in the systematic observation of protest events. With this paper, we release the first German language resource of protest event related article excerpts published in local news outlets. We use this dataset to train and evaluate transformer-based text classifiers to automatically detect relevant newspaper articles. Our best approach reaches a binary F1-score of 93.3 %, which is a promising result for our goal to support political science research. However, in a second experiment, we show that our model does not generalize equally well when applied to data from time periods and localities other than our training sample. To make protest event detection more robust, we test two ways of alternative preprocessing. First, we find that letting the classifier concentrate on sentences around protest keywords improves the F1-score for out-of-sample data up to +4 percentage points. Second, against our initial intuition, masking of named entities during preprocessing does not improve the generalization in terms of F1-scores. However, it leads to a significantly improved recall of the models.
%U https://aclanthology.org/2022.lrec-1.413
%P 3883-3891
Markdown (Informal)
[A Generalized Approach to Protest Event Detection in German Local News](https://aclanthology.org/2022.lrec-1.413) (Wiedemann et al., LREC 2022)
ACL