@inproceedings{naswan-etal-2026-astranet,
title = "{AST}ra{N}et at {S}em{E}val-2026 Task 13: Not All Code Looks the Same: Multi-View Structural and Semantic Detection of Machine-Generated Code",
author = "Naswan, Ruwad and
Saha, Dipit and
Kabir, Md. and
Tahseen, Nabiha",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.402/",
pages = "3209--3217",
ISBN = "979-8-89176-414-9",
abstract = "The growing adoption of large language models for code generation poses challenges for code quality, security, and authorship verification{---}particularly when test conditions involve unseen programming languages, generators, or application domains. We present our system, which combines three code-pretrained transformer encoders (CodeT5p-220M, CodeBERT, UniXcoder) with a structure-first Flow-Augmented AST (FA-AST) encoder implemented as a Gated Graph Neural Network. On Subtask A our best single model achieves macro F1 of 0.559; a post-competition layered rank-fusion ensemble across all three encoders raises this to 0.643. On Subtask C we obtain 0.585 officially; a three-stage ensemble combining neural probabilities with LightGBM-based features and class-priority routing raises this to 0.652. Our contributions include a language-agnostic structural detector, a diversity-driven rank-fusion strategy exploiting low inter-model correlation for binary classification, and a meta-learner stacking pipeline for multi-class detection under distribution shift."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="naswan-etal-2026-astranet">
<titleInfo>
<title>ASTraNet at SemEval-2026 Task 13: Not All Code Looks the Same: Multi-View Structural and Semantic Detection of Machine-Generated Code</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruwad</namePart>
<namePart type="family">Naswan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipit</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="family">Kabir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabiha</namePart>
<namePart type="family">Tahseen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>The growing adoption of large language models for code generation poses challenges for code quality, security, and authorship verification—particularly when test conditions involve unseen programming languages, generators, or application domains. We present our system, which combines three code-pretrained transformer encoders (CodeT5p-220M, CodeBERT, UniXcoder) with a structure-first Flow-Augmented AST (FA-AST) encoder implemented as a Gated Graph Neural Network. On Subtask A our best single model achieves macro F1 of 0.559; a post-competition layered rank-fusion ensemble across all three encoders raises this to 0.643. On Subtask C we obtain 0.585 officially; a three-stage ensemble combining neural probabilities with LightGBM-based features and class-priority routing raises this to 0.652. Our contributions include a language-agnostic structural detector, a diversity-driven rank-fusion strategy exploiting low inter-model correlation for binary classification, and a meta-learner stacking pipeline for multi-class detection under distribution shift.</abstract>
<identifier type="citekey">naswan-etal-2026-astranet</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.402/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>3209</start>
<end>3217</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ASTraNet at SemEval-2026 Task 13: Not All Code Looks the Same: Multi-View Structural and Semantic Detection of Machine-Generated Code
%A Naswan, Ruwad
%A Saha, Dipit
%A Kabir, Md.
%A Tahseen, Nabiha
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F naswan-etal-2026-astranet
%X The growing adoption of large language models for code generation poses challenges for code quality, security, and authorship verification—particularly when test conditions involve unseen programming languages, generators, or application domains. We present our system, which combines three code-pretrained transformer encoders (CodeT5p-220M, CodeBERT, UniXcoder) with a structure-first Flow-Augmented AST (FA-AST) encoder implemented as a Gated Graph Neural Network. On Subtask A our best single model achieves macro F1 of 0.559; a post-competition layered rank-fusion ensemble across all three encoders raises this to 0.643. On Subtask C we obtain 0.585 officially; a three-stage ensemble combining neural probabilities with LightGBM-based features and class-priority routing raises this to 0.652. Our contributions include a language-agnostic structural detector, a diversity-driven rank-fusion strategy exploiting low inter-model correlation for binary classification, and a meta-learner stacking pipeline for multi-class detection under distribution shift.
%U https://aclanthology.org/2026.semeval-1.402/
%P 3209-3217
Markdown (Informal)
[ASTraNet at SemEval-2026 Task 13: Not All Code Looks the Same: Multi-View Structural and Semantic Detection of Machine-Generated Code](https://aclanthology.org/2026.semeval-1.402/) (Naswan et al., SemEval 2026)
ACL