@inproceedings{jiang-etal-2026-leveraging,
title = "Leveraging Human and Machine Preferences for Zero-shot Detection of {AI}-Generated Text",
author = "Jiang, Lei and
Wu, Desheng and
Zheng, Xiaolong and
Luo, Cuicui",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.671/",
pages = "13732--13750",
ISBN = "979-8-89176-395-1",
abstract = "In recent years, the rapid advancement of large language models (LLMs) has enabled generated texts to closely mimic human writing, posing significant challenges to the detection of AI-generated content. Current mainstream zero-shot detection methods largely adopt a machine-centric perspective, relying on proxy models to compute token-level AI-likelihood scores and treating all tokens equally during overall detection. However, such approaches overlook the prediction discrepancies that arise when humans and large language models interpret the same text. We argue that tokens exhibiting greater divergence between human and machine predictions can provide stronger clues for determining the authorship of a text. To address this limitation, we propose \textbf{HAPDA}{---}a human-machine prediction discrepancy adapter for AI-generated text detection (AGTD). The framework consists of two core components: (1) a joint fine-tuning strategy for training paired human-preference and machine-preference models, and (2) a discrepancy-aware reweighting mechanism designed to calibrate token-level detection scores in downstream detectors. Extensive experiments demonstrate that HAPDA consistently and significantly enhances the detection performance of five representative baseline models under various evaluation scenarios."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiang-etal-2026-leveraging">
<titleInfo>
<title>Leveraging Human and Machine Preferences for Zero-shot Detection of AI-Generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Desheng</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaolong</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cuicui</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>In recent years, the rapid advancement of large language models (LLMs) has enabled generated texts to closely mimic human writing, posing significant challenges to the detection of AI-generated content. Current mainstream zero-shot detection methods largely adopt a machine-centric perspective, relying on proxy models to compute token-level AI-likelihood scores and treating all tokens equally during overall detection. However, such approaches overlook the prediction discrepancies that arise when humans and large language models interpret the same text. We argue that tokens exhibiting greater divergence between human and machine predictions can provide stronger clues for determining the authorship of a text. To address this limitation, we propose HAPDA—a human-machine prediction discrepancy adapter for AI-generated text detection (AGTD). The framework consists of two core components: (1) a joint fine-tuning strategy for training paired human-preference and machine-preference models, and (2) a discrepancy-aware reweighting mechanism designed to calibrate token-level detection scores in downstream detectors. Extensive experiments demonstrate that HAPDA consistently and significantly enhances the detection performance of five representative baseline models under various evaluation scenarios.</abstract>
<identifier type="citekey">jiang-etal-2026-leveraging</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.671/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>13732</start>
<end>13750</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Human and Machine Preferences for Zero-shot Detection of AI-Generated Text
%A Jiang, Lei
%A Wu, Desheng
%A Zheng, Xiaolong
%A Luo, Cuicui
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F jiang-etal-2026-leveraging
%X In recent years, the rapid advancement of large language models (LLMs) has enabled generated texts to closely mimic human writing, posing significant challenges to the detection of AI-generated content. Current mainstream zero-shot detection methods largely adopt a machine-centric perspective, relying on proxy models to compute token-level AI-likelihood scores and treating all tokens equally during overall detection. However, such approaches overlook the prediction discrepancies that arise when humans and large language models interpret the same text. We argue that tokens exhibiting greater divergence between human and machine predictions can provide stronger clues for determining the authorship of a text. To address this limitation, we propose HAPDA—a human-machine prediction discrepancy adapter for AI-generated text detection (AGTD). The framework consists of two core components: (1) a joint fine-tuning strategy for training paired human-preference and machine-preference models, and (2) a discrepancy-aware reweighting mechanism designed to calibrate token-level detection scores in downstream detectors. Extensive experiments demonstrate that HAPDA consistently and significantly enhances the detection performance of five representative baseline models under various evaluation scenarios.
%U https://aclanthology.org/2026.findings-acl.671/
%P 13732-13750
Markdown (Informal)
[Leveraging Human and Machine Preferences for Zero-shot Detection of AI-Generated Text](https://aclanthology.org/2026.findings-acl.671/) (Jiang et al., Findings 2026)
ACL