@inproceedings{zhang-etal-2025-aiguard,
title = "{AIG}uard: A Benchmark and Lightweight Detection for {E}-commerce {AIGC} Risks",
author = "Zhang, Wenhua and
Li, Weicheng and
Rao, Xuanrong and
Zou, Lixin and
Luo, Xiangyang and
Zhuang, Chubin and
Hong, Yongjie and
Qin, Zhen and
Chang, Hengyu and
Li, Chenliang and
Zheng, Bo",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.643/",
doi = "10.18653/v1/2025.findings-acl.643",
pages = "12437--12450",
ISBN = "979-8-89176-256-5",
abstract = "Recent advancements in AI-generated content (AIGC) have heightened concerns about harmful outputs, such as misinformation and malicious misuse.Existing detection methods face two key limitations:(1) lacking real-world AIGC scenarios and corresponding risk datasets, and(2) both traditional and multimodal large language models (MLLMs) struggle to detect risks in AIGC.Towards this end, we introduce **AIGuard**, the first benchmark for AIGC risk detection in real-world e-commerce. It includes 253,420 image-text pairs (i.e., the risk content and risk description) across four critical categories: *abnormal body*, *violating physical laws*, *misleading or illogical context*, and *harmful or problematic message*.To effectively detect these risks, we propose distilling text annotations into dense soft prompts and identifying risk content through image soft prompt matching during inference.Experiments on the benchmark show that this method achieves a 9.68{\%} higher recall than leading multimodal models while using only 25{\%} of the training resources and improving inference speed by 37.8 times.For further research, our benchmark and code are available at [https://github.com/wenh-zhang/aiguard-dataset](https://github.com/wenh-zhang/aiguard-dataset)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-aiguard">
<titleInfo>
<title>AIGuard: A Benchmark and Lightweight Detection for E-commerce AIGC Risks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenhua</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weicheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanrong</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lixin</namePart>
<namePart type="family">Zou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyang</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chubin</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongjie</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhen</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hengyu</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenliang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Recent advancements in AI-generated content (AIGC) have heightened concerns about harmful outputs, such as misinformation and malicious misuse.Existing detection methods face two key limitations:(1) lacking real-world AIGC scenarios and corresponding risk datasets, and(2) both traditional and multimodal large language models (MLLMs) struggle to detect risks in AIGC.Towards this end, we introduce **AIGuard**, the first benchmark for AIGC risk detection in real-world e-commerce. It includes 253,420 image-text pairs (i.e., the risk content and risk description) across four critical categories: *abnormal body*, *violating physical laws*, *misleading or illogical context*, and *harmful or problematic message*.To effectively detect these risks, we propose distilling text annotations into dense soft prompts and identifying risk content through image soft prompt matching during inference.Experiments on the benchmark show that this method achieves a 9.68% higher recall than leading multimodal models while using only 25% of the training resources and improving inference speed by 37.8 times.For further research, our benchmark and code are available at [https://github.com/wenh-zhang/aiguard-dataset](https://github.com/wenh-zhang/aiguard-dataset).</abstract>
<identifier type="citekey">zhang-etal-2025-aiguard</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.643</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.643/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>12437</start>
<end>12450</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AIGuard: A Benchmark and Lightweight Detection for E-commerce AIGC Risks
%A Zhang, Wenhua
%A Li, Weicheng
%A Rao, Xuanrong
%A Zou, Lixin
%A Luo, Xiangyang
%A Zhuang, Chubin
%A Hong, Yongjie
%A Qin, Zhen
%A Chang, Hengyu
%A Li, Chenliang
%A Zheng, Bo
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F zhang-etal-2025-aiguard
%X Recent advancements in AI-generated content (AIGC) have heightened concerns about harmful outputs, such as misinformation and malicious misuse.Existing detection methods face two key limitations:(1) lacking real-world AIGC scenarios and corresponding risk datasets, and(2) both traditional and multimodal large language models (MLLMs) struggle to detect risks in AIGC.Towards this end, we introduce **AIGuard**, the first benchmark for AIGC risk detection in real-world e-commerce. It includes 253,420 image-text pairs (i.e., the risk content and risk description) across four critical categories: *abnormal body*, *violating physical laws*, *misleading or illogical context*, and *harmful or problematic message*.To effectively detect these risks, we propose distilling text annotations into dense soft prompts and identifying risk content through image soft prompt matching during inference.Experiments on the benchmark show that this method achieves a 9.68% higher recall than leading multimodal models while using only 25% of the training resources and improving inference speed by 37.8 times.For further research, our benchmark and code are available at [https://github.com/wenh-zhang/aiguard-dataset](https://github.com/wenh-zhang/aiguard-dataset).
%R 10.18653/v1/2025.findings-acl.643
%U https://aclanthology.org/2025.findings-acl.643/
%U https://doi.org/10.18653/v1/2025.findings-acl.643
%P 12437-12450
Markdown (Informal)
[AIGuard: A Benchmark and Lightweight Detection for E-commerce AIGC Risks](https://aclanthology.org/2025.findings-acl.643/) (Zhang et al., Findings 2025)
ACL
- Wenhua Zhang, Weicheng Li, Xuanrong Rao, Lixin Zou, Xiangyang Luo, Chubin Zhuang, Yongjie Hong, Zhen Qin, Hengyu Chang, Chenliang Li, and Bo Zheng. 2025. AIGuard: A Benchmark and Lightweight Detection for E-commerce AIGC Risks. In Findings of the Association for Computational Linguistics: ACL 2025, pages 12437–12450, Vienna, Austria. Association for Computational Linguistics.