@inproceedings{santhanam-etal-2026-stylometry,
title = "Stylometry at {S}em{E}val-2026 Task 13: Clustered Stylometric Modeling for Machine-Generated Code Detection",
author = "Santhanam, Sruthi and
Sarkar, Parthib and
Sharma, Yashvardhan",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.172/",
pages = "1319--1325",
ISBN = "979-8-89176-414-9",
abstract = {Machine-generated code detection is examined under out-of-distribution conditions where robust generalization is required. A hybrid feature representation is used in which code snippets are encoded through character-level TF{--}IDF patterns together with explicit structural indicators capturing properties such as verbosity and formatting behavior. Variability across generators is handled through clustering-based expert specialization, and predictions are produced using an ensemble of logistic regression and Na{\"i}ve Bayes models with calibrated thresholds. Experimental results show that the proposed approach performs competitively despite relying on simple linear classifiers. The findings suggest that persistent structural patterns in code provide reliable cross-domain signals for identifying machine-generated programs.}
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="santhanam-etal-2026-stylometry">
<titleInfo>
<title>Stylometry at SemEval-2026 Task 13: Clustered Stylometric Modeling for Machine-Generated Code Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sruthi</namePart>
<namePart type="family">Santhanam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parthib</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yashvardhan</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>Machine-generated code detection is examined under out-of-distribution conditions where robust generalization is required. A hybrid feature representation is used in which code snippets are encoded through character-level TF–IDF patterns together with explicit structural indicators capturing properties such as verbosity and formatting behavior. Variability across generators is handled through clustering-based expert specialization, and predictions are produced using an ensemble of logistic regression and Naïve Bayes models with calibrated thresholds. Experimental results show that the proposed approach performs competitively despite relying on simple linear classifiers. The findings suggest that persistent structural patterns in code provide reliable cross-domain signals for identifying machine-generated programs.</abstract>
<identifier type="citekey">santhanam-etal-2026-stylometry</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.172/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1319</start>
<end>1325</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Stylometry at SemEval-2026 Task 13: Clustered Stylometric Modeling for Machine-Generated Code Detection
%A Santhanam, Sruthi
%A Sarkar, Parthib
%A Sharma, Yashvardhan
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F santhanam-etal-2026-stylometry
%X Machine-generated code detection is examined under out-of-distribution conditions where robust generalization is required. A hybrid feature representation is used in which code snippets are encoded through character-level TF–IDF patterns together with explicit structural indicators capturing properties such as verbosity and formatting behavior. Variability across generators is handled through clustering-based expert specialization, and predictions are produced using an ensemble of logistic regression and Naïve Bayes models with calibrated thresholds. Experimental results show that the proposed approach performs competitively despite relying on simple linear classifiers. The findings suggest that persistent structural patterns in code provide reliable cross-domain signals for identifying machine-generated programs.
%U https://aclanthology.org/2026.semeval-1.172/
%P 1319-1325
Markdown (Informal)
[Stylometry at SemEval-2026 Task 13: Clustered Stylometric Modeling for Machine-Generated Code Detection](https://aclanthology.org/2026.semeval-1.172/) (Santhanam et al., SemEval 2026)
ACL