@inproceedings{s-etal-2026-pixel,
title = "Pixel Phantoms at {S}em{E}val-2026 Task 13: Exploring Classical and Neural Approaches for {AI}-Generated Code Detection",
author = "S, Jithu Morrison and
Hariharakrishnan, Janani and
S, Angel Deborah and
S, Rajalakshmi",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.256/",
pages = "2040--2045",
ISBN = "979-8-89176-414-9",
abstract = "This paper describes our system for SemEval-2026 Task 13, Subtask A: detecting whether a given code snippet is AI-generated or human-written. We explored a range of approaches from classical machine learning baselines using TF-IDF representations to fine-tuned transformer models pre-trained on code, specifically CodeBERT and GraphCodeBERT. Our experiments revealed a notable degradation in model performance when CodeBERT was trained beyond an optimal number of steps, indicating that continued training within an epoch leads to overfitting or representation drift. GraphCodeBERT, by contrast, yielded our best submission with a macro F1 score of 0.36866. Our findings highlight the sensitivity of code-specific transformers to training duration and suggest that early checkpoint selection is critical for this task."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="s-etal-2026-pixel">
<titleInfo>
<title>Pixel Phantoms at SemEval-2026 Task 13: Exploring Classical and Neural Approaches for AI-Generated Code Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jithu</namePart>
<namePart type="given">Morrison</namePart>
<namePart type="family">S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janani</namePart>
<namePart type="family">Hariharakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angel</namePart>
<namePart type="given">Deborah</namePart>
<namePart type="family">S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajalakshmi</namePart>
<namePart type="family">S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>This paper describes our system for SemEval-2026 Task 13, Subtask A: detecting whether a given code snippet is AI-generated or human-written. We explored a range of approaches from classical machine learning baselines using TF-IDF representations to fine-tuned transformer models pre-trained on code, specifically CodeBERT and GraphCodeBERT. Our experiments revealed a notable degradation in model performance when CodeBERT was trained beyond an optimal number of steps, indicating that continued training within an epoch leads to overfitting or representation drift. GraphCodeBERT, by contrast, yielded our best submission with a macro F1 score of 0.36866. Our findings highlight the sensitivity of code-specific transformers to training duration and suggest that early checkpoint selection is critical for this task.</abstract>
<identifier type="citekey">s-etal-2026-pixel</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.256/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2040</start>
<end>2045</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pixel Phantoms at SemEval-2026 Task 13: Exploring Classical and Neural Approaches for AI-Generated Code Detection
%A S, Jithu Morrison
%A Hariharakrishnan, Janani
%A S, Angel Deborah
%A S, Rajalakshmi
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F s-etal-2026-pixel
%X This paper describes our system for SemEval-2026 Task 13, Subtask A: detecting whether a given code snippet is AI-generated or human-written. We explored a range of approaches from classical machine learning baselines using TF-IDF representations to fine-tuned transformer models pre-trained on code, specifically CodeBERT and GraphCodeBERT. Our experiments revealed a notable degradation in model performance when CodeBERT was trained beyond an optimal number of steps, indicating that continued training within an epoch leads to overfitting or representation drift. GraphCodeBERT, by contrast, yielded our best submission with a macro F1 score of 0.36866. Our findings highlight the sensitivity of code-specific transformers to training duration and suggest that early checkpoint selection is critical for this task.
%U https://aclanthology.org/2026.semeval-1.256/
%P 2040-2045
Markdown (Informal)
[Pixel Phantoms at SemEval-2026 Task 13: Exploring Classical and Neural Approaches for AI-Generated Code Detection](https://aclanthology.org/2026.semeval-1.256/) (S et al., SemEval 2026)
ACL