@inproceedings{zhang-etal-2026-copyright,
title = "Copyright Detective: A Forensic System to Evidence {LLM}s Flickering Copyright Leakage Risks",
author = "Zhang, Guangwei and
Zhu, Jianing and
Qian, Cheng and
Gong, Neil Zhenqiang and
Mihalcea, Rada and
Xu, Zhaozhuo and
He, Jingrui and
Ma, Jiaqi W. and
Xiao, Chaowei and
Li, Bo and
Abbasi, Ahmed and
Lee, Dongwon and
Ji, Heng and
Zhang, Denghui",
editor = "Durrett, Greg and
Jian, Ping",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-demo.2/",
pages = "14--26",
ISBN = "979-8-89176-392-0",
abstract = "We present **Copyright Detective**, the first interactive forensic system for detecting, analyzing, and visualizing potential copyright risks in LLM outputs. The system treats copyright infringement versus compliance as an **evidence discovery** process rather than a static classification task due to the complex nature of copyright law. It integrates multiple detection paradigms, including content recall testing, paraphrase-level similarity analysis, persuasive jailbreak probing, and unlearning verification, within a unified and extensible framework. Through interactive prompting, response collection, and iterative workflows, our system enables systematic auditing of verbatim memorization and paraphrase-level leakage, supporting responsible deployment and transparent evaluation of LLM copyright risks even with black-box access. In our experiments with GPT-4o-mini, we demonstrate that the specific persuasive strategy ``Pathos'' shifts the leakage distribution from about 0.1 (ROUGE-L) to 0.7. Our live system is hosted on [Streamlit server](https://copyright-detective.streamlit.app), with a [demonstration video](https://youtu.be/z9Lh4kNDHiM) included as supplementary material."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2026-copyright">
<titleInfo>
<title>Copyright Detective: A Forensic System to Evidence LLMs Flickering Copyright Leakage Risks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guangwei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianing</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neil</namePart>
<namePart type="given">Zhenqiang</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rada</namePart>
<namePart type="family">Mihalcea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhaozhuo</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingrui</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaqi</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaowei</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abbasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongwon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denghui</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ping</namePart>
<namePart type="family">Jian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-392-0</identifier>
</relatedItem>
<abstract>We present **Copyright Detective**, the first interactive forensic system for detecting, analyzing, and visualizing potential copyright risks in LLM outputs. The system treats copyright infringement versus compliance as an **evidence discovery** process rather than a static classification task due to the complex nature of copyright law. It integrates multiple detection paradigms, including content recall testing, paraphrase-level similarity analysis, persuasive jailbreak probing, and unlearning verification, within a unified and extensible framework. Through interactive prompting, response collection, and iterative workflows, our system enables systematic auditing of verbatim memorization and paraphrase-level leakage, supporting responsible deployment and transparent evaluation of LLM copyright risks even with black-box access. In our experiments with GPT-4o-mini, we demonstrate that the specific persuasive strategy “Pathos” shifts the leakage distribution from about 0.1 (ROUGE-L) to 0.7. Our live system is hosted on [Streamlit server](https://copyright-detective.streamlit.app), with a [demonstration video](https://youtu.be/z9Lh4kNDHiM) included as supplementary material.</abstract>
<identifier type="citekey">zhang-etal-2026-copyright</identifier>
<location>
<url>https://aclanthology.org/2026.acl-demo.2/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>14</start>
<end>26</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Copyright Detective: A Forensic System to Evidence LLMs Flickering Copyright Leakage Risks
%A Zhang, Guangwei
%A Zhu, Jianing
%A Qian, Cheng
%A Gong, Neil Zhenqiang
%A Mihalcea, Rada
%A Xu, Zhaozhuo
%A He, Jingrui
%A Ma, Jiaqi W.
%A Xiao, Chaowei
%A Li, Bo
%A Abbasi, Ahmed
%A Lee, Dongwon
%A Ji, Heng
%A Zhang, Denghui
%Y Durrett, Greg
%Y Jian, Ping
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-392-0
%F zhang-etal-2026-copyright
%X We present **Copyright Detective**, the first interactive forensic system for detecting, analyzing, and visualizing potential copyright risks in LLM outputs. The system treats copyright infringement versus compliance as an **evidence discovery** process rather than a static classification task due to the complex nature of copyright law. It integrates multiple detection paradigms, including content recall testing, paraphrase-level similarity analysis, persuasive jailbreak probing, and unlearning verification, within a unified and extensible framework. Through interactive prompting, response collection, and iterative workflows, our system enables systematic auditing of verbatim memorization and paraphrase-level leakage, supporting responsible deployment and transparent evaluation of LLM copyright risks even with black-box access. In our experiments with GPT-4o-mini, we demonstrate that the specific persuasive strategy “Pathos” shifts the leakage distribution from about 0.1 (ROUGE-L) to 0.7. Our live system is hosted on [Streamlit server](https://copyright-detective.streamlit.app), with a [demonstration video](https://youtu.be/z9Lh4kNDHiM) included as supplementary material.
%U https://aclanthology.org/2026.acl-demo.2/
%P 14-26
Markdown (Informal)
[Copyright Detective: A Forensic System to Evidence LLMs Flickering Copyright Leakage Risks](https://aclanthology.org/2026.acl-demo.2/) (Zhang et al., ACL 2026)
ACL
- Guangwei Zhang, Jianing Zhu, Cheng Qian, Neil Zhenqiang Gong, Rada Mihalcea, Zhaozhuo Xu, Jingrui He, Jiaqi W. Ma, Chaowei Xiao, Bo Li, Ahmed Abbasi, Dongwon Lee, Heng Ji, and Denghui Zhang. 2026. Copyright Detective: A Forensic System to Evidence LLMs Flickering Copyright Leakage Risks. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 14–26, San Diego, California, United States. Association for Computational Linguistics.