@inproceedings{torabi-asr-taboada-2018-data,
title = "The Data Challenge in Misinformation Detection: Source Reputation vs. Content Veracity",
author = "Torabi Asr, Fatemeh and
Taboada, Maite",
editor = "Thorne, James and
Vlachos, Andreas and
Cocarascu, Oana and
Christodoulopoulos, Christos and
Mittal, Arpit",
booktitle = "Proceedings of the First Workshop on Fact Extraction and {VER}ification ({FEVER})",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-5502",
doi = "10.18653/v1/W18-5502",
pages = "10--15",
abstract = "Misinformation detection at the level of full news articles is a text classification problem. Reliably labeled data in this domain is rare. Previous work relied on news articles collected from so-called {``}reputable{''} and {``}suspicious{''} websites and labeled accordingly. We leverage fact-checking websites to collect individually-labeled news articles with regard to the veracity of their content and use this data to test the cross-domain generalization of a classifier trained on bigger text collections but labeled according to source reputation. Our results suggest that reputation-based classification is not sufficient for predicting the veracity level of the majority of news articles, and that the system performance on different test datasets depends on topic distribution. Therefore collecting well-balanced and carefully-assessed training data is a priority for developing robust misinformation detection systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="torabi-asr-taboada-2018-data">
<titleInfo>
<title>The Data Challenge in Misinformation Detection: Source Reputation vs. Content Veracity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fatemeh</namePart>
<namePart type="family">Torabi Asr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Taboada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Fact Extraction and VERification (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Misinformation detection at the level of full news articles is a text classification problem. Reliably labeled data in this domain is rare. Previous work relied on news articles collected from so-called “reputable” and “suspicious” websites and labeled accordingly. We leverage fact-checking websites to collect individually-labeled news articles with regard to the veracity of their content and use this data to test the cross-domain generalization of a classifier trained on bigger text collections but labeled according to source reputation. Our results suggest that reputation-based classification is not sufficient for predicting the veracity level of the majority of news articles, and that the system performance on different test datasets depends on topic distribution. Therefore collecting well-balanced and carefully-assessed training data is a priority for developing robust misinformation detection systems.</abstract>
<identifier type="citekey">torabi-asr-taboada-2018-data</identifier>
<identifier type="doi">10.18653/v1/W18-5502</identifier>
<location>
<url>https://aclanthology.org/W18-5502</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>10</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Data Challenge in Misinformation Detection: Source Reputation vs. Content Veracity
%A Torabi Asr, Fatemeh
%A Taboada, Maite
%Y Thorne, James
%Y Vlachos, Andreas
%Y Cocarascu, Oana
%Y Christodoulopoulos, Christos
%Y Mittal, Arpit
%S Proceedings of the First Workshop on Fact Extraction and VERification (FEVER)
%D 2018
%8 November
%I Association for Computational Linguistics
%C Brussels, Belgium
%F torabi-asr-taboada-2018-data
%X Misinformation detection at the level of full news articles is a text classification problem. Reliably labeled data in this domain is rare. Previous work relied on news articles collected from so-called “reputable” and “suspicious” websites and labeled accordingly. We leverage fact-checking websites to collect individually-labeled news articles with regard to the veracity of their content and use this data to test the cross-domain generalization of a classifier trained on bigger text collections but labeled according to source reputation. Our results suggest that reputation-based classification is not sufficient for predicting the veracity level of the majority of news articles, and that the system performance on different test datasets depends on topic distribution. Therefore collecting well-balanced and carefully-assessed training data is a priority for developing robust misinformation detection systems.
%R 10.18653/v1/W18-5502
%U https://aclanthology.org/W18-5502
%U https://doi.org/10.18653/v1/W18-5502
%P 10-15
Markdown (Informal)
[The Data Challenge in Misinformation Detection: Source Reputation vs. Content Veracity](https://aclanthology.org/W18-5502) (Torabi Asr & Taboada, EMNLP 2018)
ACL