@inproceedings{bohacek-etal-2023-czech,
title = "{C}zech-ing the News: Article Trustworthiness Dataset for {C}zech",
author = "Bohacek, Matyas and
Bravansky, Michal and
Trhl{\'\i}k, Filip and
Moravec, Vaclav",
editor = "Barnes, Jeremy and
De Clercq, Orph{\'e}e and
Klinger, Roman",
booktitle = "Proceedings of the 13th Workshop on Computational Approaches to Subjectivity, Sentiment, {\&} Social Media Analysis",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wassa-1.10",
doi = "10.18653/v1/2023.wassa-1.10",
pages = "96--109",
abstract = "We present the Verifee dataset: a multimodal dataset of news articles with fine-grained trustworthiness annotations. We bring a diverse set of researchers from social, media, and computer sciences aboard to study this interdisciplinary problem holistically and develop a detailed methodology that assesses the texts through the lens of editorial transparency, journalist conventions, and objective reporting while penalizing manipulative techniques. We collect over 10,000 annotated articles from 60 Czech online news sources. Each item is categorized into one of the 4 proposed classes on the credibility spectrum {--} ranging from entirely trustworthy articles to deceptive ones {--} and annotated of its manipulative attributes. We fine-tune prominent sequence-to-sequence language models for the trustworthiness classification task on our dataset and report the best F-1 score of 0.53. We open-source the dataset, annotation methodology, and annotators{'} instructions in full length at \url{https://www.verifee.ai/research/} to enable easy build-up work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bohacek-etal-2023-czech">
<titleInfo>
<title>Czech-ing the News: Article Trustworthiness Dataset for Czech</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matyas</namePart>
<namePart type="family">Bohacek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Bravansky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Trhlík</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vaclav</namePart>
<namePart type="family">Moravec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on Computational Approaches to Subjectivity, Sentiment, & Social Media Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Barnes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orphée</namePart>
<namePart type="family">De Clercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Klinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the Verifee dataset: a multimodal dataset of news articles with fine-grained trustworthiness annotations. We bring a diverse set of researchers from social, media, and computer sciences aboard to study this interdisciplinary problem holistically and develop a detailed methodology that assesses the texts through the lens of editorial transparency, journalist conventions, and objective reporting while penalizing manipulative techniques. We collect over 10,000 annotated articles from 60 Czech online news sources. Each item is categorized into one of the 4 proposed classes on the credibility spectrum – ranging from entirely trustworthy articles to deceptive ones – and annotated of its manipulative attributes. We fine-tune prominent sequence-to-sequence language models for the trustworthiness classification task on our dataset and report the best F-1 score of 0.53. We open-source the dataset, annotation methodology, and annotators’ instructions in full length at https://www.verifee.ai/research/ to enable easy build-up work.</abstract>
<identifier type="citekey">bohacek-etal-2023-czech</identifier>
<identifier type="doi">10.18653/v1/2023.wassa-1.10</identifier>
<location>
<url>https://aclanthology.org/2023.wassa-1.10</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>96</start>
<end>109</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Czech-ing the News: Article Trustworthiness Dataset for Czech
%A Bohacek, Matyas
%A Bravansky, Michal
%A Trhlík, Filip
%A Moravec, Vaclav
%Y Barnes, Jeremy
%Y De Clercq, Orphée
%Y Klinger, Roman
%S Proceedings of the 13th Workshop on Computational Approaches to Subjectivity, Sentiment, & Social Media Analysis
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F bohacek-etal-2023-czech
%X We present the Verifee dataset: a multimodal dataset of news articles with fine-grained trustworthiness annotations. We bring a diverse set of researchers from social, media, and computer sciences aboard to study this interdisciplinary problem holistically and develop a detailed methodology that assesses the texts through the lens of editorial transparency, journalist conventions, and objective reporting while penalizing manipulative techniques. We collect over 10,000 annotated articles from 60 Czech online news sources. Each item is categorized into one of the 4 proposed classes on the credibility spectrum – ranging from entirely trustworthy articles to deceptive ones – and annotated of its manipulative attributes. We fine-tune prominent sequence-to-sequence language models for the trustworthiness classification task on our dataset and report the best F-1 score of 0.53. We open-source the dataset, annotation methodology, and annotators’ instructions in full length at https://www.verifee.ai/research/ to enable easy build-up work.
%R 10.18653/v1/2023.wassa-1.10
%U https://aclanthology.org/2023.wassa-1.10
%U https://doi.org/10.18653/v1/2023.wassa-1.10
%P 96-109
Markdown (Informal)
[Czech-ing the News: Article Trustworthiness Dataset for Czech](https://aclanthology.org/2023.wassa-1.10) (Bohacek et al., WASSA 2023)
ACL
- Matyas Bohacek, Michal Bravansky, Filip Trhlík, and Vaclav Moravec. 2023. Czech-ing the News: Article Trustworthiness Dataset for Czech. In Proceedings of the 13th Workshop on Computational Approaches to Subjectivity, Sentiment, & Social Media Analysis, pages 96–109, Toronto, Canada. Association for Computational Linguistics.