@inproceedings{perez-montero-etal-2025-detecting,
title = "Detecting Deception in Disinformation across Languages: The Role of Linguistic Markers",
author = "Perez-Montero, Alba and
Gargova, Silvia and
Lloret, Elena and
Moreda Pozo, Paloma",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.108/",
pages = "943--952",
abstract = "The unstoppable proliferation of news driven by the rise of digital media has intensified the challenge of news verification. Natural Language Processing (NLP) offers solutions, primarily through content and context analysis. Recognizing the vital role of linguistic analysis, this paper presents a multilingual study of linguistic markers for automated deceptive fake news detection across English, Spanish, and Bulgarian. We compiled datasets in these languages to extract and analyze both general and specific linguistic markers. We then performed feature selection using the \textit{SelectKBest} algorithm, applying it to various classification models with different combinations of general and specific linguistic markers. The results show that Logistic Regression and Support Vector Machine classification models achieved F1-scores above 0.8 for English and Spanish. For Bulgarian, Random Forest yielded the best results with an F1-score of 0.73. While these markers demonstrate potential for transferability to other languages, results may vary due to inherent linguistic characteristics. This necessitates further experimentation, especially in low-resource languages like Bulgarian. These findings highlight the significant potential of our dataset and linguistic markers for multilingual deceptive news detection."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perez-montero-etal-2025-detecting">
<titleInfo>
<title>Detecting Deception in Disinformation across Languages: The Role of Linguistic Markers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alba</namePart>
<namePart type="family">Perez-Montero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silvia</namePart>
<namePart type="family">Gargova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Lloret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paloma</namePart>
<namePart type="family">Moreda Pozo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The unstoppable proliferation of news driven by the rise of digital media has intensified the challenge of news verification. Natural Language Processing (NLP) offers solutions, primarily through content and context analysis. Recognizing the vital role of linguistic analysis, this paper presents a multilingual study of linguistic markers for automated deceptive fake news detection across English, Spanish, and Bulgarian. We compiled datasets in these languages to extract and analyze both general and specific linguistic markers. We then performed feature selection using the SelectKBest algorithm, applying it to various classification models with different combinations of general and specific linguistic markers. The results show that Logistic Regression and Support Vector Machine classification models achieved F1-scores above 0.8 for English and Spanish. For Bulgarian, Random Forest yielded the best results with an F1-score of 0.73. While these markers demonstrate potential for transferability to other languages, results may vary due to inherent linguistic characteristics. This necessitates further experimentation, especially in low-resource languages like Bulgarian. These findings highlight the significant potential of our dataset and linguistic markers for multilingual deceptive news detection.</abstract>
<identifier type="citekey">perez-montero-etal-2025-detecting</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.108/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>943</start>
<end>952</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Deception in Disinformation across Languages: The Role of Linguistic Markers
%A Perez-Montero, Alba
%A Gargova, Silvia
%A Lloret, Elena
%A Moreda Pozo, Paloma
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F perez-montero-etal-2025-detecting
%X The unstoppable proliferation of news driven by the rise of digital media has intensified the challenge of news verification. Natural Language Processing (NLP) offers solutions, primarily through content and context analysis. Recognizing the vital role of linguistic analysis, this paper presents a multilingual study of linguistic markers for automated deceptive fake news detection across English, Spanish, and Bulgarian. We compiled datasets in these languages to extract and analyze both general and specific linguistic markers. We then performed feature selection using the SelectKBest algorithm, applying it to various classification models with different combinations of general and specific linguistic markers. The results show that Logistic Regression and Support Vector Machine classification models achieved F1-scores above 0.8 for English and Spanish. For Bulgarian, Random Forest yielded the best results with an F1-score of 0.73. While these markers demonstrate potential for transferability to other languages, results may vary due to inherent linguistic characteristics. This necessitates further experimentation, especially in low-resource languages like Bulgarian. These findings highlight the significant potential of our dataset and linguistic markers for multilingual deceptive news detection.
%U https://aclanthology.org/2025.ranlp-1.108/
%P 943-952
Markdown (Informal)
[Detecting Deception in Disinformation across Languages: The Role of Linguistic Markers](https://aclanthology.org/2025.ranlp-1.108/) (Perez-Montero et al., RANLP 2025)
ACL