@inproceedings{mattern-etal-2021-fang,
title = "{FANG}-{COVID}: A New Large-Scale Benchmark Dataset for Fake News Detection in {G}erman",
author = "Mattern, Justus and
Qiao, Yu and
Kerz, Elma and
Wiechmann, Daniel and
Strohmaier, Markus",
booktitle = "Proceedings of the Fourth Workshop on Fact Extraction and VERification (FEVER)",
month = nov,
year = "2021",
address = "Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.fever-1.9",
doi = "10.18653/v1/2021.fever-1.9",
pages = "78--91",
abstract = "As the world continues to fight the COVID-19 pandemic, it is simultaneously fighting an {`}infodemic{'} {--} a flood of disinformation and spread of conspiracy theories leading to health threats and the division of society. To combat this infodemic, there is an urgent need for benchmark datasets that can help researchers develop and evaluate models geared towards automatic detection of disinformation. While there are increasing efforts to create adequate, open-source benchmark datasets for English, comparable resources are virtually unavailable for German, leaving research for the German language lagging significantly behind. In this paper, we introduce the new benchmark dataset FANG-COVID consisting of 28,056 real and 13,186 fake German news articles related to the COVID-19 pandemic as well as data on their propagation on Twitter. Furthermore, we propose an explainable textual- and social context-based model for fake news detection, compare its performance to {``}black-box{''} models and perform feature ablation to assess the relative importance of human-interpretable features in distinguishing fake news from authentic news.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mattern-etal-2021-fang">
<titleInfo>
<title>FANG-COVID: A New Large-Scale Benchmark Dataset for Fake News Detection in German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Justus</namePart>
<namePart type="family">Mattern</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Qiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elma</namePart>
<namePart type="family">Kerz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Wiechmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Strohmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Fact Extraction and VERification (FEVER)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As the world continues to fight the COVID-19 pandemic, it is simultaneously fighting an ‘infodemic’ – a flood of disinformation and spread of conspiracy theories leading to health threats and the division of society. To combat this infodemic, there is an urgent need for benchmark datasets that can help researchers develop and evaluate models geared towards automatic detection of disinformation. While there are increasing efforts to create adequate, open-source benchmark datasets for English, comparable resources are virtually unavailable for German, leaving research for the German language lagging significantly behind. In this paper, we introduce the new benchmark dataset FANG-COVID consisting of 28,056 real and 13,186 fake German news articles related to the COVID-19 pandemic as well as data on their propagation on Twitter. Furthermore, we propose an explainable textual- and social context-based model for fake news detection, compare its performance to “black-box” models and perform feature ablation to assess the relative importance of human-interpretable features in distinguishing fake news from authentic news.</abstract>
<identifier type="citekey">mattern-etal-2021-fang</identifier>
<identifier type="doi">10.18653/v1/2021.fever-1.9</identifier>
<location>
<url>https://aclanthology.org/2021.fever-1.9</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>78</start>
<end>91</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FANG-COVID: A New Large-Scale Benchmark Dataset for Fake News Detection in German
%A Mattern, Justus
%A Qiao, Yu
%A Kerz, Elma
%A Wiechmann, Daniel
%A Strohmaier, Markus
%S Proceedings of the Fourth Workshop on Fact Extraction and VERification (FEVER)
%D 2021
%8 November
%I Association for Computational Linguistics
%C Dominican Republic
%F mattern-etal-2021-fang
%X As the world continues to fight the COVID-19 pandemic, it is simultaneously fighting an ‘infodemic’ – a flood of disinformation and spread of conspiracy theories leading to health threats and the division of society. To combat this infodemic, there is an urgent need for benchmark datasets that can help researchers develop and evaluate models geared towards automatic detection of disinformation. While there are increasing efforts to create adequate, open-source benchmark datasets for English, comparable resources are virtually unavailable for German, leaving research for the German language lagging significantly behind. In this paper, we introduce the new benchmark dataset FANG-COVID consisting of 28,056 real and 13,186 fake German news articles related to the COVID-19 pandemic as well as data on their propagation on Twitter. Furthermore, we propose an explainable textual- and social context-based model for fake news detection, compare its performance to “black-box” models and perform feature ablation to assess the relative importance of human-interpretable features in distinguishing fake news from authentic news.
%R 10.18653/v1/2021.fever-1.9
%U https://aclanthology.org/2021.fever-1.9
%U https://doi.org/10.18653/v1/2021.fever-1.9
%P 78-91
Markdown (Informal)
[FANG-COVID: A New Large-Scale Benchmark Dataset for Fake News Detection in German](https://aclanthology.org/2021.fever-1.9) (Mattern et al., FEVER 2021)
ACL