@inproceedings{chen-zhang-2025-retrieverguard,
title = "{R}etriever{G}uard: Empowering Information Retrieval to Combat {LLM}-Generated Misinformation",
author = "Chen, Chuwen and
Zhang, Shuai",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.249/",
doi = "10.18653/v1/2025.findings-naacl.249",
pages = "4399--4411",
ISBN = "979-8-89176-195-7",
abstract = "Large language models (LLMs) have demonstrated impressive capabilities in generating human-like text and have been shown to store factual knowledge within their extensive parameters. However, models like ChatGPT can still actively or passively generate false or misleading information, increasing the challenge of distinguishing between human-created and machine-generated content. This poses significant risks to the authenticity and reliability of digital communication. This work aims to enhance retrieval models' ability to identify the authenticity of texts generated by large language models, with the goal of improving the truthfulness of retrieved texts and reducing the harm of false information in the era of large models. Our contributions include: (1) we construct a diverse dataset of authentic human-authored texts and highly deceptive AI-generated texts from various domains; (2) we propose a self-supervised training method, RetrieverGuard, that enables the model to capture textual rules and styles of false information from the corpus without human-labelled data, achieving higher accuracy and robustness in identifying misleading and highly deceptive AI-generated content."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-zhang-2025-retrieverguard">
<titleInfo>
<title>RetrieverGuard: Empowering Information Retrieval to Combat LLM-Generated Misinformation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chuwen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuai</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have demonstrated impressive capabilities in generating human-like text and have been shown to store factual knowledge within their extensive parameters. However, models like ChatGPT can still actively or passively generate false or misleading information, increasing the challenge of distinguishing between human-created and machine-generated content. This poses significant risks to the authenticity and reliability of digital communication. This work aims to enhance retrieval models’ ability to identify the authenticity of texts generated by large language models, with the goal of improving the truthfulness of retrieved texts and reducing the harm of false information in the era of large models. Our contributions include: (1) we construct a diverse dataset of authentic human-authored texts and highly deceptive AI-generated texts from various domains; (2) we propose a self-supervised training method, RetrieverGuard, that enables the model to capture textual rules and styles of false information from the corpus without human-labelled data, achieving higher accuracy and robustness in identifying misleading and highly deceptive AI-generated content.</abstract>
<identifier type="citekey">chen-zhang-2025-retrieverguard</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.249</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.249/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>4399</start>
<end>4411</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RetrieverGuard: Empowering Information Retrieval to Combat LLM-Generated Misinformation
%A Chen, Chuwen
%A Zhang, Shuai
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F chen-zhang-2025-retrieverguard
%X Large language models (LLMs) have demonstrated impressive capabilities in generating human-like text and have been shown to store factual knowledge within their extensive parameters. However, models like ChatGPT can still actively or passively generate false or misleading information, increasing the challenge of distinguishing between human-created and machine-generated content. This poses significant risks to the authenticity and reliability of digital communication. This work aims to enhance retrieval models’ ability to identify the authenticity of texts generated by large language models, with the goal of improving the truthfulness of retrieved texts and reducing the harm of false information in the era of large models. Our contributions include: (1) we construct a diverse dataset of authentic human-authored texts and highly deceptive AI-generated texts from various domains; (2) we propose a self-supervised training method, RetrieverGuard, that enables the model to capture textual rules and styles of false information from the corpus without human-labelled data, achieving higher accuracy and robustness in identifying misleading and highly deceptive AI-generated content.
%R 10.18653/v1/2025.findings-naacl.249
%U https://aclanthology.org/2025.findings-naacl.249/
%U https://doi.org/10.18653/v1/2025.findings-naacl.249
%P 4399-4411
Markdown (Informal)
[RetrieverGuard: Empowering Information Retrieval to Combat LLM-Generated Misinformation](https://aclanthology.org/2025.findings-naacl.249/) (Chen & Zhang, Findings 2025)
ACL