@inproceedings{abiola-etal-2025-cic-nlp,
title = "{CIC}-{NLP} at {G}en{AI} Detection Task 1: Leveraging {D}istil{BERT} for Detecting Machine-Generated Text in {E}nglish",
author = "Abiola, Tolulope Olalekan and
Bizuneh, Tewodros Achamaleh and
Abiola, Oluwatobi Joseph and
Oladepo, Temitope Olasunkanmi and
Ojo, Olumide Ebenezer and
Sidorov, Grigori and
Kolesnikova, Olga",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.29/",
pages = "271--277",
abstract = "As machine-generated texts (MGT) become increasingly similar to human writing, these dis- tinctions are harder to identify. In this paper, we as the CIC-NLP team present our submission to the Gen-AI Content Detection Workshop at COLING 2025 for Task 1 Subtask A, which involves distinguishing between text generated by LLMs and text authored by humans, with an emphasis on detecting English-only MGT. We applied the DistilBERT model to this binary classification task using the dataset provided by the organizers. Fine-tuning the model effectively differentiated between the classes, resulting in a micro-average F1-score of 0.70 on the evaluation test set. We provide a detailed explanation of the fine-tuning parameters and steps involved in our analysis."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abiola-etal-2025-cic-nlp">
<titleInfo>
<title>CIC-NLP at GenAI Detection Task 1: Leveraging DistilBERT for Detecting Machine-Generated Text in English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tolulope</namePart>
<namePart type="given">Olalekan</namePart>
<namePart type="family">Abiola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tewodros</namePart>
<namePart type="given">Achamaleh</namePart>
<namePart type="family">Bizuneh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oluwatobi</namePart>
<namePart type="given">Joseph</namePart>
<namePart type="family">Abiola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Temitope</namePart>
<namePart type="given">Olasunkanmi</namePart>
<namePart type="family">Oladepo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olumide</namePart>
<namePart type="given">Ebenezer</namePart>
<namePart type="family">Ojo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Grigori</namePart>
<namePart type="family">Sidorov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Kolesnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mucahid</namePart>
<namePart type="family">Kutlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Mikros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As machine-generated texts (MGT) become increasingly similar to human writing, these dis- tinctions are harder to identify. In this paper, we as the CIC-NLP team present our submission to the Gen-AI Content Detection Workshop at COLING 2025 for Task 1 Subtask A, which involves distinguishing between text generated by LLMs and text authored by humans, with an emphasis on detecting English-only MGT. We applied the DistilBERT model to this binary classification task using the dataset provided by the organizers. Fine-tuning the model effectively differentiated between the classes, resulting in a micro-average F1-score of 0.70 on the evaluation test set. We provide a detailed explanation of the fine-tuning parameters and steps involved in our analysis.</abstract>
<identifier type="citekey">abiola-etal-2025-cic-nlp</identifier>
<location>
<url>https://aclanthology.org/2025.genaidetect-1.29/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>271</start>
<end>277</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CIC-NLP at GenAI Detection Task 1: Leveraging DistilBERT for Detecting Machine-Generated Text in English
%A Abiola, Tolulope Olalekan
%A Bizuneh, Tewodros Achamaleh
%A Abiola, Oluwatobi Joseph
%A Oladepo, Temitope Olasunkanmi
%A Ojo, Olumide Ebenezer
%A Sidorov, Grigori
%A Kolesnikova, Olga
%Y Alam, Firoj
%Y Nakov, Preslav
%Y Habash, Nizar
%Y Gurevych, Iryna
%Y Chowdhury, Shammur
%Y Shelmanov, Artem
%Y Wang, Yuxia
%Y Artemova, Ekaterina
%Y Kutlu, Mucahid
%Y Mikros, George
%S Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)
%D 2025
%8 January
%I International Conference on Computational Linguistics
%C Abu Dhabi, UAE
%F abiola-etal-2025-cic-nlp
%X As machine-generated texts (MGT) become increasingly similar to human writing, these dis- tinctions are harder to identify. In this paper, we as the CIC-NLP team present our submission to the Gen-AI Content Detection Workshop at COLING 2025 for Task 1 Subtask A, which involves distinguishing between text generated by LLMs and text authored by humans, with an emphasis on detecting English-only MGT. We applied the DistilBERT model to this binary classification task using the dataset provided by the organizers. Fine-tuning the model effectively differentiated between the classes, resulting in a micro-average F1-score of 0.70 on the evaluation test set. We provide a detailed explanation of the fine-tuning parameters and steps involved in our analysis.
%U https://aclanthology.org/2025.genaidetect-1.29/
%P 271-277
Markdown (Informal)
[CIC-NLP at GenAI Detection Task 1: Leveraging DistilBERT for Detecting Machine-Generated Text in English](https://aclanthology.org/2025.genaidetect-1.29/) (Abiola et al., GenAIDetect 2025)
ACL