@inproceedings{yadagiri-etal-2025-ai,
title = "{AI}-Generated Text Detection Using {D}e{BERT}a with Auxiliary Stylometric Features",
author = "Yadagiri, Annepaka and
Sai Teja, L. D. M. S and
Pakray, Partha and
Chunka, Chukhu",
editor = "Lamsiyah, Salima and
Ezzini, Saad and
El Mahdaoui, Abdelkader and
Alami, Hamza and
Benlahbib, Abdessamad and
El Amrani, Samir and
Chafik, Salmane and
Hammouchi, Hicham",
booktitle = "Proceedings of the Shared Task on Multi-Domain Detection of AI-Generated Text",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-mdaigt.2/",
pages = "10--14",
abstract = "The global proliferation of Generative Artificial Intelligence (GenAI) has led to the increasing presence of AI-generated text across a wide spectrum of topics, ranging from everyday content to critical and specialized domains. Often, individuals are unaware that the text they interact with was produced by AI systems rather than human authors, leading to instances where AI-generated content is unintentionally combined with human-written material. In response to this growing concern, we propose a novel approach as part of the Multi-Domain AI-Generated Text Detection (M-DAIGT) shared task, which aims to accurately identify AI-generated content across multiple domains, particularly in news reporting and academic writing. Given the rapid evolution of large language models (LLMs), distinguishing between human-authored and AI-generated text has become increasingly challenging. To address this, our method employs fine-tuning strategies using transformer-based language models for binary text classification. We focus on two specific domains, news and scholarly writing, and demonstrate that our approach, based on the DeBERTa transformer model, achieves superior performance in identifying AI-generated text. Our team, CNLP-NITS-PP, achieved 5th position in Subtask 1 and 3rd position in Subtask 2."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yadagiri-etal-2025-ai">
<titleInfo>
<title>AI-Generated Text Detection Using DeBERTa with Auxiliary Stylometric Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annepaka</namePart>
<namePart type="family">Yadagiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">L</namePart>
<namePart type="given">D</namePart>
<namePart type="given">M</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Sai Teja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Partha</namePart>
<namePart type="family">Pakray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chukhu</namePart>
<namePart type="family">Chunka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Shared Task on Multi-Domain Detection of AI-Generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salima</namePart>
<namePart type="family">Lamsiyah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdelkader</namePart>
<namePart type="family">El Mahdaoui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamza</namePart>
<namePart type="family">Alami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdessamad</namePart>
<namePart type="family">Benlahbib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samir</namePart>
<namePart type="family">El Amrani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salmane</namePart>
<namePart type="family">Chafik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hicham</namePart>
<namePart type="family">Hammouchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The global proliferation of Generative Artificial Intelligence (GenAI) has led to the increasing presence of AI-generated text across a wide spectrum of topics, ranging from everyday content to critical and specialized domains. Often, individuals are unaware that the text they interact with was produced by AI systems rather than human authors, leading to instances where AI-generated content is unintentionally combined with human-written material. In response to this growing concern, we propose a novel approach as part of the Multi-Domain AI-Generated Text Detection (M-DAIGT) shared task, which aims to accurately identify AI-generated content across multiple domains, particularly in news reporting and academic writing. Given the rapid evolution of large language models (LLMs), distinguishing between human-authored and AI-generated text has become increasingly challenging. To address this, our method employs fine-tuning strategies using transformer-based language models for binary text classification. We focus on two specific domains, news and scholarly writing, and demonstrate that our approach, based on the DeBERTa transformer model, achieves superior performance in identifying AI-generated text. Our team, CNLP-NITS-PP, achieved 5th position in Subtask 1 and 3rd position in Subtask 2.</abstract>
<identifier type="citekey">yadagiri-etal-2025-ai</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-mdaigt.2/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>10</start>
<end>14</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AI-Generated Text Detection Using DeBERTa with Auxiliary Stylometric Features
%A Yadagiri, Annepaka
%A Sai Teja, L. D. M. S.
%A Pakray, Partha
%A Chunka, Chukhu
%Y Lamsiyah, Salima
%Y Ezzini, Saad
%Y El Mahdaoui, Abdelkader
%Y Alami, Hamza
%Y Benlahbib, Abdessamad
%Y El Amrani, Samir
%Y Chafik, Salmane
%Y Hammouchi, Hicham
%S Proceedings of the Shared Task on Multi-Domain Detection of AI-Generated Text
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F yadagiri-etal-2025-ai
%X The global proliferation of Generative Artificial Intelligence (GenAI) has led to the increasing presence of AI-generated text across a wide spectrum of topics, ranging from everyday content to critical and specialized domains. Often, individuals are unaware that the text they interact with was produced by AI systems rather than human authors, leading to instances where AI-generated content is unintentionally combined with human-written material. In response to this growing concern, we propose a novel approach as part of the Multi-Domain AI-Generated Text Detection (M-DAIGT) shared task, which aims to accurately identify AI-generated content across multiple domains, particularly in news reporting and academic writing. Given the rapid evolution of large language models (LLMs), distinguishing between human-authored and AI-generated text has become increasingly challenging. To address this, our method employs fine-tuning strategies using transformer-based language models for binary text classification. We focus on two specific domains, news and scholarly writing, and demonstrate that our approach, based on the DeBERTa transformer model, achieves superior performance in identifying AI-generated text. Our team, CNLP-NITS-PP, achieved 5th position in Subtask 1 and 3rd position in Subtask 2.
%U https://aclanthology.org/2025.ranlp-mdaigt.2/
%P 10-14
Markdown (Informal)
[AI-Generated Text Detection Using DeBERTa with Auxiliary Stylometric Features](https://aclanthology.org/2025.ranlp-mdaigt.2/) (Yadagiri et al., RANLP 2025)
ACL