@inproceedings{sai-teja-etal-2026-codedet,
title = "{C}ode{D}et-{NITS} at {S}em{E}val-2026 Task 13: {AI} Code Authorship Detection Beyond Truncation",
author = "Sai Teja, Lekkala and
Yadagiri, Annepaka and
Patiyal, Kshitij and
Sai Anish, Sangam and
Pakray, Partha",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.397/",
pages = "3165--3168",
ISBN = "979-8-89176-414-9",
abstract = "Automatically determining whether source code is human written or produced by a specific family of large language models is becoming essential for reliable assessment, provenance tracking, and dataset curation. We present a lightweight yet competitive system for SemEval 2026 Task 13 Subtask B, which requires attributing each snippet to one of eleven classes: human or one of ten LLM families. Our method repurposes code oriented instruction tuned backbones from the Qwen2.5 Coder series as sequence classifiers and adapts them using QLoRA, combining frozen low precision weights with low rank trainable adapters to reduce memory and compute overhead. The core design choice addresses long snippets without losing evidence. Instead of truncating to a fixed context, we apply an overlapping sliding window strategy that expands long examples into multiple fixed length windows during training, all sharing the same label. For validation and test, windows are generated on the fly and their evidence is aggregated by averaging logits to yield a single prediction per snippet, enabling token complete use of the input while keeping inference stable. Our final submission ranked 8th on the official Subtask B test set leaderboard."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sai-teja-etal-2026-codedet">
<titleInfo>
<title>CodeDet-NITS at SemEval-2026 Task 13: AI Code Authorship Detection Beyond Truncation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lekkala</namePart>
<namePart type="family">Sai Teja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annepaka</namePart>
<namePart type="family">Yadagiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kshitij</namePart>
<namePart type="family">Patiyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sangam</namePart>
<namePart type="family">Sai Anish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Partha</namePart>
<namePart type="family">Pakray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>Automatically determining whether source code is human written or produced by a specific family of large language models is becoming essential for reliable assessment, provenance tracking, and dataset curation. We present a lightweight yet competitive system for SemEval 2026 Task 13 Subtask B, which requires attributing each snippet to one of eleven classes: human or one of ten LLM families. Our method repurposes code oriented instruction tuned backbones from the Qwen2.5 Coder series as sequence classifiers and adapts them using QLoRA, combining frozen low precision weights with low rank trainable adapters to reduce memory and compute overhead. The core design choice addresses long snippets without losing evidence. Instead of truncating to a fixed context, we apply an overlapping sliding window strategy that expands long examples into multiple fixed length windows during training, all sharing the same label. For validation and test, windows are generated on the fly and their evidence is aggregated by averaging logits to yield a single prediction per snippet, enabling token complete use of the input while keeping inference stable. Our final submission ranked 8th on the official Subtask B test set leaderboard.</abstract>
<identifier type="citekey">sai-teja-etal-2026-codedet</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.397/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>3165</start>
<end>3168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CodeDet-NITS at SemEval-2026 Task 13: AI Code Authorship Detection Beyond Truncation
%A Sai Teja, Lekkala
%A Yadagiri, Annepaka
%A Patiyal, Kshitij
%A Sai Anish, Sangam
%A Pakray, Partha
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F sai-teja-etal-2026-codedet
%X Automatically determining whether source code is human written or produced by a specific family of large language models is becoming essential for reliable assessment, provenance tracking, and dataset curation. We present a lightweight yet competitive system for SemEval 2026 Task 13 Subtask B, which requires attributing each snippet to one of eleven classes: human or one of ten LLM families. Our method repurposes code oriented instruction tuned backbones from the Qwen2.5 Coder series as sequence classifiers and adapts them using QLoRA, combining frozen low precision weights with low rank trainable adapters to reduce memory and compute overhead. The core design choice addresses long snippets without losing evidence. Instead of truncating to a fixed context, we apply an overlapping sliding window strategy that expands long examples into multiple fixed length windows during training, all sharing the same label. For validation and test, windows are generated on the fly and their evidence is aggregated by averaging logits to yield a single prediction per snippet, enabling token complete use of the input while keeping inference stable. Our final submission ranked 8th on the official Subtask B test set leaderboard.
%U https://aclanthology.org/2026.semeval-1.397/
%P 3165-3168
Markdown (Informal)
[CodeDet-NITS at SemEval-2026 Task 13: AI Code Authorship Detection Beyond Truncation](https://aclanthology.org/2026.semeval-1.397/) (Sai Teja et al., SemEval 2026)
ACL