@inproceedings{orel-etal-2026-aicd,
title = "{AICD} Bench: A Challenging Benchmark for {AI}-Generated Code Detection",
author = "Orel, Daniil and
Azizov, Dilshod and
Paul, Indraneil and
Wang, Yuxia and
Gurevych, Iryna and
Nakov, Preslav",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.325/",
pages = "6913--6938",
ISBN = "979-8-89176-380-7",
abstract = "Large language models (LLMs) are increasingly capable of generating functional source code, raising concerns about authorship, accountability, and security. While detecting AI-generated code is critical, existing datasets and benchmarks are narrow, typically limited to binary human{--}machine classification under in-distribution settings. To bridge this gap, we introduce AICD Bench, the most comprehensive benchmark for AI-generated code detection. It spans 2M examples, 77 models across 11 families, and 9 programming languages, including recent reasoning models. Beyond scale, AICD Bench introduces three realistic detection tasks: (i) Robust Binary Classification under distribution shifts in language and domain, (ii) Model Family Attribution, grouping generators by architectural lineage, and (iii) Fine-Grained Human{--}Machine Classification across human, machine, hybrid, and adversarial code. Extensive evaluation on neural and classical detectors shows that performance remains far below practical usability, particularly under distribution shift and for hybrid or adversarial code. We release AICD Bench as a unified, challenging evaluation suite to drive the next generation of robust approaches for AI-generated code detection. The data and the code are available at https://huggingface.co/AICD-bench."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="orel-etal-2026-aicd">
<titleInfo>
<title>AICD Bench: A Challenging Benchmark for AI-Generated Code Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Orel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilshod</namePart>
<namePart type="family">Azizov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indraneil</namePart>
<namePart type="family">Paul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Large language models (LLMs) are increasingly capable of generating functional source code, raising concerns about authorship, accountability, and security. While detecting AI-generated code is critical, existing datasets and benchmarks are narrow, typically limited to binary human–machine classification under in-distribution settings. To bridge this gap, we introduce AICD Bench, the most comprehensive benchmark for AI-generated code detection. It spans 2M examples, 77 models across 11 families, and 9 programming languages, including recent reasoning models. Beyond scale, AICD Bench introduces three realistic detection tasks: (i) Robust Binary Classification under distribution shifts in language and domain, (ii) Model Family Attribution, grouping generators by architectural lineage, and (iii) Fine-Grained Human–Machine Classification across human, machine, hybrid, and adversarial code. Extensive evaluation on neural and classical detectors shows that performance remains far below practical usability, particularly under distribution shift and for hybrid or adversarial code. We release AICD Bench as a unified, challenging evaluation suite to drive the next generation of robust approaches for AI-generated code detection. The data and the code are available at https://huggingface.co/AICD-bench.</abstract>
<identifier type="citekey">orel-etal-2026-aicd</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.325/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>6913</start>
<end>6938</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AICD Bench: A Challenging Benchmark for AI-Generated Code Detection
%A Orel, Daniil
%A Azizov, Dilshod
%A Paul, Indraneil
%A Wang, Yuxia
%A Gurevych, Iryna
%A Nakov, Preslav
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F orel-etal-2026-aicd
%X Large language models (LLMs) are increasingly capable of generating functional source code, raising concerns about authorship, accountability, and security. While detecting AI-generated code is critical, existing datasets and benchmarks are narrow, typically limited to binary human–machine classification under in-distribution settings. To bridge this gap, we introduce AICD Bench, the most comprehensive benchmark for AI-generated code detection. It spans 2M examples, 77 models across 11 families, and 9 programming languages, including recent reasoning models. Beyond scale, AICD Bench introduces three realistic detection tasks: (i) Robust Binary Classification under distribution shifts in language and domain, (ii) Model Family Attribution, grouping generators by architectural lineage, and (iii) Fine-Grained Human–Machine Classification across human, machine, hybrid, and adversarial code. Extensive evaluation on neural and classical detectors shows that performance remains far below practical usability, particularly under distribution shift and for hybrid or adversarial code. We release AICD Bench as a unified, challenging evaluation suite to drive the next generation of robust approaches for AI-generated code detection. The data and the code are available at https://huggingface.co/AICD-bench.
%U https://aclanthology.org/2026.eacl-long.325/
%P 6913-6938
Markdown (Informal)
[AICD Bench: A Challenging Benchmark for AI-Generated Code Detection](https://aclanthology.org/2026.eacl-long.325/) (Orel et al., EACL 2026)
ACL
- Daniil Orel, Dilshod Azizov, Indraneil Paul, Yuxia Wang, Iryna Gurevych, and Preslav Nakov. 2026. AICD Bench: A Challenging Benchmark for AI-Generated Code Detection. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 6913–6938, Rabat, Morocco. Association for Computational Linguistics.