@inproceedings{valdez-etal-2026-late,
title = "{LATE}-{IIMAS} at {S}emeval-2026 Task 13: Evaluating {GNN}s, {PLM}s, {LLM}s, and Stylometry for Automatic Code Identification",
author = "Valdez, Andric and
Ancona, Emmanuel and
Bernardino, Sebasti{\'a}n and
Gomez-Adorno, Helena and
Balouchzahi, Fazlourrahman and
Herrera, Fabian",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.339/",
pages = "2689--2696",
ISBN = "979-8-89176-414-9",
abstract = "The generation of source code via Artificial Intelligence has become a prevalent practice in both academia and industry, posing significant challenges to academic integrity and authorship attribution. In this work, we address SemEval-2026 Task 13: Detecting Machine-Generated Code by evaluating the effectiveness of four distinct methodologies: Graph Neural Networks (GNNs), Pre-trained Language Models (PLMs), Large Language Models (LLMs), and Stylometric Feature Engineering using XGBoost. Our approach focuses on three specific scenarios: Subtask A (Binary Detection), Subtask B (Multi-Class Authorship), and Subtask C (Hybrid Code Detection). While our models achieved high performance during the validation phase, the transition to the final test set revealed substantial challenges in generalization, likely due to the increased diversity of programming languages and generators in the unseen data. This work serves as a foundational first step, identifying critical gaps in model robustness and highlighting the need for more sophisticated methodologies to bridge the performance gap in complex, real-world environments."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="valdez-etal-2026-late">
<titleInfo>
<title>LATE-IIMAS at Semeval-2026 Task 13: Evaluating GNNs, PLMs, LLMs, and Stylometry for Automatic Code Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andric</namePart>
<namePart type="family">Valdez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Ancona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastián</namePart>
<namePart type="family">Bernardino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez-Adorno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fazlourrahman</namePart>
<namePart type="family">Balouchzahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabian</namePart>
<namePart type="family">Herrera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>The generation of source code via Artificial Intelligence has become a prevalent practice in both academia and industry, posing significant challenges to academic integrity and authorship attribution. In this work, we address SemEval-2026 Task 13: Detecting Machine-Generated Code by evaluating the effectiveness of four distinct methodologies: Graph Neural Networks (GNNs), Pre-trained Language Models (PLMs), Large Language Models (LLMs), and Stylometric Feature Engineering using XGBoost. Our approach focuses on three specific scenarios: Subtask A (Binary Detection), Subtask B (Multi-Class Authorship), and Subtask C (Hybrid Code Detection). While our models achieved high performance during the validation phase, the transition to the final test set revealed substantial challenges in generalization, likely due to the increased diversity of programming languages and generators in the unseen data. This work serves as a foundational first step, identifying critical gaps in model robustness and highlighting the need for more sophisticated methodologies to bridge the performance gap in complex, real-world environments.</abstract>
<identifier type="citekey">valdez-etal-2026-late</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.339/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2689</start>
<end>2696</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LATE-IIMAS at Semeval-2026 Task 13: Evaluating GNNs, PLMs, LLMs, and Stylometry for Automatic Code Identification
%A Valdez, Andric
%A Ancona, Emmanuel
%A Bernardino, Sebastián
%A Gomez-Adorno, Helena
%A Balouchzahi, Fazlourrahman
%A Herrera, Fabian
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F valdez-etal-2026-late
%X The generation of source code via Artificial Intelligence has become a prevalent practice in both academia and industry, posing significant challenges to academic integrity and authorship attribution. In this work, we address SemEval-2026 Task 13: Detecting Machine-Generated Code by evaluating the effectiveness of four distinct methodologies: Graph Neural Networks (GNNs), Pre-trained Language Models (PLMs), Large Language Models (LLMs), and Stylometric Feature Engineering using XGBoost. Our approach focuses on three specific scenarios: Subtask A (Binary Detection), Subtask B (Multi-Class Authorship), and Subtask C (Hybrid Code Detection). While our models achieved high performance during the validation phase, the transition to the final test set revealed substantial challenges in generalization, likely due to the increased diversity of programming languages and generators in the unseen data. This work serves as a foundational first step, identifying critical gaps in model robustness and highlighting the need for more sophisticated methodologies to bridge the performance gap in complex, real-world environments.
%U https://aclanthology.org/2026.semeval-1.339/
%P 2689-2696
Markdown (Informal)
[LATE-IIMAS at Semeval-2026 Task 13: Evaluating GNNs, PLMs, LLMs, and Stylometry for Automatic Code Identification](https://aclanthology.org/2026.semeval-1.339/) (Valdez et al., SemEval 2026)
ACL
- Andric Valdez, Emmanuel Ancona, Sebastián Bernardino, Helena Gomez-Adorno, Fazlourrahman Balouchzahi, and Fabian Herrera. 2026. LATE-IIMAS at Semeval-2026 Task 13: Evaluating GNNs, PLMs, LLMs, and Stylometry for Automatic Code Identification. In Proceedings of the 20th International Workshop on Semantic Evaluation (2026), pages 2689–2696, San Diego, California, USA. Association for Computational Linguistics.