@inproceedings{yadagiri-etal-2025-cnlp,
title = "{CNLP}-{NITS}-{PP} at {G}en{AI} Detection Task 1: {AI}-Generated Text Using Transformer-Based Approaches",
author = "Yadagiri, Annepaka and
Lekkala, Sai Teja and
Vardhan, Mandadoddi Srikar and
Pakray, Partha and
Krishna, Reddi Mohana",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.20/",
pages = "197--202",
abstract = "In the current digital landscape, distinguishing between text generated by humans and that created by large language models has become increasingly complex. This challenge is exacerbated by advanced LLMs such as the Gemini, ChatGPT, GPT-4, and LLaMa, which can produce highly sophisticated, human-like text. This indistinguishability introduces a range of challenges across different sectors. Cybersecurity increases the risk of social engineering and misinformation, while social media aids the spread of biased or false content. The educational sector faces issues of academic integrity, and within large, multi-team environments, these models add complexity to managing interactions between human and AI agents. To address these challenges, we approached the problem as a binary classification task using an English-language benchmark COLING dataset. We employed transformer-based neural network models, including BERT, DistilBERT, and RoBERTa, fine-tuning each model with optimized hyperparameters to maximize classification accuracy. Our team CNLP-NITS-PP has achieved the 23rd rank in subtask 1 at COLING-2025 for machine-generated text detection in English with a Main Score F1 Macro of 0.6502 and micro-F1 score of 0.6876."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yadagiri-etal-2025-cnlp">
<titleInfo>
<title>CNLP-NITS-PP at GenAI Detection Task 1: AI-Generated Text Using Transformer-Based Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annepaka</namePart>
<namePart type="family">Yadagiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Teja</namePart>
<namePart type="family">Lekkala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mandadoddi</namePart>
<namePart type="given">Srikar</namePart>
<namePart type="family">Vardhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Partha</namePart>
<namePart type="family">Pakray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reddi</namePart>
<namePart type="given">Mohana</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mucahid</namePart>
<namePart type="family">Kutlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Mikros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the current digital landscape, distinguishing between text generated by humans and that created by large language models has become increasingly complex. This challenge is exacerbated by advanced LLMs such as the Gemini, ChatGPT, GPT-4, and LLaMa, which can produce highly sophisticated, human-like text. This indistinguishability introduces a range of challenges across different sectors. Cybersecurity increases the risk of social engineering and misinformation, while social media aids the spread of biased or false content. The educational sector faces issues of academic integrity, and within large, multi-team environments, these models add complexity to managing interactions between human and AI agents. To address these challenges, we approached the problem as a binary classification task using an English-language benchmark COLING dataset. We employed transformer-based neural network models, including BERT, DistilBERT, and RoBERTa, fine-tuning each model with optimized hyperparameters to maximize classification accuracy. Our team CNLP-NITS-PP has achieved the 23rd rank in subtask 1 at COLING-2025 for machine-generated text detection in English with a Main Score F1 Macro of 0.6502 and micro-F1 score of 0.6876.</abstract>
<identifier type="citekey">yadagiri-etal-2025-cnlp</identifier>
<location>
<url>https://aclanthology.org/2025.genaidetect-1.20/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>197</start>
<end>202</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CNLP-NITS-PP at GenAI Detection Task 1: AI-Generated Text Using Transformer-Based Approaches
%A Yadagiri, Annepaka
%A Lekkala, Sai Teja
%A Vardhan, Mandadoddi Srikar
%A Pakray, Partha
%A Krishna, Reddi Mohana
%Y Alam, Firoj
%Y Nakov, Preslav
%Y Habash, Nizar
%Y Gurevych, Iryna
%Y Chowdhury, Shammur
%Y Shelmanov, Artem
%Y Wang, Yuxia
%Y Artemova, Ekaterina
%Y Kutlu, Mucahid
%Y Mikros, George
%S Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)
%D 2025
%8 January
%I International Conference on Computational Linguistics
%C Abu Dhabi, UAE
%F yadagiri-etal-2025-cnlp
%X In the current digital landscape, distinguishing between text generated by humans and that created by large language models has become increasingly complex. This challenge is exacerbated by advanced LLMs such as the Gemini, ChatGPT, GPT-4, and LLaMa, which can produce highly sophisticated, human-like text. This indistinguishability introduces a range of challenges across different sectors. Cybersecurity increases the risk of social engineering and misinformation, while social media aids the spread of biased or false content. The educational sector faces issues of academic integrity, and within large, multi-team environments, these models add complexity to managing interactions between human and AI agents. To address these challenges, we approached the problem as a binary classification task using an English-language benchmark COLING dataset. We employed transformer-based neural network models, including BERT, DistilBERT, and RoBERTa, fine-tuning each model with optimized hyperparameters to maximize classification accuracy. Our team CNLP-NITS-PP has achieved the 23rd rank in subtask 1 at COLING-2025 for machine-generated text detection in English with a Main Score F1 Macro of 0.6502 and micro-F1 score of 0.6876.
%U https://aclanthology.org/2025.genaidetect-1.20/
%P 197-202
Markdown (Informal)
[CNLP-NITS-PP at GenAI Detection Task 1: AI-Generated Text Using Transformer-Based Approaches](https://aclanthology.org/2025.genaidetect-1.20/) (Yadagiri et al., GenAIDetect 2025)
ACL