@inproceedings{qazi-etal-2025-retrieval,
title = "When retrieval outperforms generation: Dense evidence retrieval for scalable fake news detection",
author = "Qazi, Alamgir Munir and
McCrae, John Philip and
Nasir, Jamal",
editor = "Alam, Mehwish and
Tchechmedjiev, Andon and
Gracia, Jorge and
Gromann, Dagmar and
di Buono, Maria Pia and
Monti, Johanna and
Ionov, Maxim",
booktitle = "Proceedings of the 5th Conference on Language, Data and Knowledge",
month = sep,
year = "2025",
address = "Naples, Italy",
publisher = "Unior Press",
url = "https://aclanthology.org/2025.ldk-1.26/",
pages = "255--265",
ISBN = "978-88-6719-333-2",
abstract = "The proliferation of misinformation necessitates robust yet computationally efficient fact verification systems. While current state-of-the-art approaches leverage Large Language Models (LLMs) for generating explanatory rationales, these methods face significant computational barriers and hallucination risks in real-world deployments. We present DeReC (Dense Retrieval Classification), a lightweight framework that demonstrates how general-purpose text embeddings can effectively replace autoregressive LLM-based approaches in fact verification tasks. By combining dense retrieval with specialized classification, our system achieves better accuracy while being significantly more efficient. DeReC outperforms explanation-generating LLMs in efficiency, reducing runtime by 95{\%} on RAWFC (23 minutes 36 seconds compared to 454 minutes 12 seconds) and by 92{\%} on LIAR-RAW (134 minutes 14 seconds compared to 1692 minutes 23 seconds), showcasing its effectiveness across varying dataset sizes. On the RAWFC dataset, DeReC achieves an F1 score of 65.58{\%}, surpassing the state-of-the-art method L-Defense (61.20{\%}). Our results demonstrate that carefully engineered retrieval-based systems can match or exceed LLM performance in specialized tasks while being significantly more practical for real-world deployment."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qazi-etal-2025-retrieval">
<titleInfo>
<title>When retrieval outperforms generation: Dense evidence retrieval for scalable fake news detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alamgir</namePart>
<namePart type="given">Munir</namePart>
<namePart type="family">Qazi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">Philip</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jamal</namePart>
<namePart type="family">Nasir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Conference on Language, Data and Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehwish</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andon</namePart>
<namePart type="family">Tchechmedjiev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorge</namePart>
<namePart type="family">Gracia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dagmar</namePart>
<namePart type="family">Gromann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="family">di Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxim</namePart>
<namePart type="family">Ionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Unior Press</publisher>
<place>
<placeTerm type="text">Naples, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-88-6719-333-2</identifier>
</relatedItem>
<abstract>The proliferation of misinformation necessitates robust yet computationally efficient fact verification systems. While current state-of-the-art approaches leverage Large Language Models (LLMs) for generating explanatory rationales, these methods face significant computational barriers and hallucination risks in real-world deployments. We present DeReC (Dense Retrieval Classification), a lightweight framework that demonstrates how general-purpose text embeddings can effectively replace autoregressive LLM-based approaches in fact verification tasks. By combining dense retrieval with specialized classification, our system achieves better accuracy while being significantly more efficient. DeReC outperforms explanation-generating LLMs in efficiency, reducing runtime by 95% on RAWFC (23 minutes 36 seconds compared to 454 minutes 12 seconds) and by 92% on LIAR-RAW (134 minutes 14 seconds compared to 1692 minutes 23 seconds), showcasing its effectiveness across varying dataset sizes. On the RAWFC dataset, DeReC achieves an F1 score of 65.58%, surpassing the state-of-the-art method L-Defense (61.20%). Our results demonstrate that carefully engineered retrieval-based systems can match or exceed LLM performance in specialized tasks while being significantly more practical for real-world deployment.</abstract>
<identifier type="citekey">qazi-etal-2025-retrieval</identifier>
<location>
<url>https://aclanthology.org/2025.ldk-1.26/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>255</start>
<end>265</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T When retrieval outperforms generation: Dense evidence retrieval for scalable fake news detection
%A Qazi, Alamgir Munir
%A McCrae, John Philip
%A Nasir, Jamal
%Y Alam, Mehwish
%Y Tchechmedjiev, Andon
%Y Gracia, Jorge
%Y Gromann, Dagmar
%Y di Buono, Maria Pia
%Y Monti, Johanna
%Y Ionov, Maxim
%S Proceedings of the 5th Conference on Language, Data and Knowledge
%D 2025
%8 September
%I Unior Press
%C Naples, Italy
%@ 978-88-6719-333-2
%F qazi-etal-2025-retrieval
%X The proliferation of misinformation necessitates robust yet computationally efficient fact verification systems. While current state-of-the-art approaches leverage Large Language Models (LLMs) for generating explanatory rationales, these methods face significant computational barriers and hallucination risks in real-world deployments. We present DeReC (Dense Retrieval Classification), a lightweight framework that demonstrates how general-purpose text embeddings can effectively replace autoregressive LLM-based approaches in fact verification tasks. By combining dense retrieval with specialized classification, our system achieves better accuracy while being significantly more efficient. DeReC outperforms explanation-generating LLMs in efficiency, reducing runtime by 95% on RAWFC (23 minutes 36 seconds compared to 454 minutes 12 seconds) and by 92% on LIAR-RAW (134 minutes 14 seconds compared to 1692 minutes 23 seconds), showcasing its effectiveness across varying dataset sizes. On the RAWFC dataset, DeReC achieves an F1 score of 65.58%, surpassing the state-of-the-art method L-Defense (61.20%). Our results demonstrate that carefully engineered retrieval-based systems can match or exceed LLM performance in specialized tasks while being significantly more practical for real-world deployment.
%U https://aclanthology.org/2025.ldk-1.26/
%P 255-265
Markdown (Informal)
[When retrieval outperforms generation: Dense evidence retrieval for scalable fake news detection](https://aclanthology.org/2025.ldk-1.26/) (Qazi et al., LDK 2025)
ACL