@inproceedings{kiss-berend-2025-szegedai,
title = "{S}zeged{AI} at {G}en{AI} Detection Task 1: Beyond Binary - Soft-Voting Multi-Class Classification for Binary Machine-Generated Text Detection Across Diverse Language Models",
author = "Kiss, Mihaly and
Berend, G{\'a}bor",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.15/",
pages = "166--172",
abstract = "This paper describes the participation of the SzegedAI team in Subtask A of Task 1 at the COLING 2025 Workshop on Detecting AI-Generated Content. Our solutions investigate the effectiveness of combining multi-class approaches with ensemble methods for detecting machine-generated text. This approach groups models into multiple classes based on properties such as model size or generative capabilities. Additionally, we employ a length-based method, utilizing specialized expert models designed for specific text length ranges. During inference, we condense multi-class predictions into a binary outcome, categorizing any label other than human as AI-generated. The effectiveness of both standard and snapshot ensemble techniques is evaluated. Although not all multi-class configurations outperformed the binary setup, our findings indicate that the combination of multi-class training and ensemble methods can enhance performance over single-method or binary approaches."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kiss-berend-2025-szegedai">
<titleInfo>
<title>SzegedAI at GenAI Detection Task 1: Beyond Binary - Soft-Voting Multi-Class Classification for Binary Machine-Generated Text Detection Across Diverse Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihaly</namePart>
<namePart type="family">Kiss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gábor</namePart>
<namePart type="family">Berend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mucahid</namePart>
<namePart type="family">Kutlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Mikros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the participation of the SzegedAI team in Subtask A of Task 1 at the COLING 2025 Workshop on Detecting AI-Generated Content. Our solutions investigate the effectiveness of combining multi-class approaches with ensemble methods for detecting machine-generated text. This approach groups models into multiple classes based on properties such as model size or generative capabilities. Additionally, we employ a length-based method, utilizing specialized expert models designed for specific text length ranges. During inference, we condense multi-class predictions into a binary outcome, categorizing any label other than human as AI-generated. The effectiveness of both standard and snapshot ensemble techniques is evaluated. Although not all multi-class configurations outperformed the binary setup, our findings indicate that the combination of multi-class training and ensemble methods can enhance performance over single-method or binary approaches.</abstract>
<identifier type="citekey">kiss-berend-2025-szegedai</identifier>
<location>
<url>https://aclanthology.org/2025.genaidetect-1.15/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>166</start>
<end>172</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SzegedAI at GenAI Detection Task 1: Beyond Binary - Soft-Voting Multi-Class Classification for Binary Machine-Generated Text Detection Across Diverse Language Models
%A Kiss, Mihaly
%A Berend, Gábor
%Y Alam, Firoj
%Y Nakov, Preslav
%Y Habash, Nizar
%Y Gurevych, Iryna
%Y Chowdhury, Shammur
%Y Shelmanov, Artem
%Y Wang, Yuxia
%Y Artemova, Ekaterina
%Y Kutlu, Mucahid
%Y Mikros, George
%S Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)
%D 2025
%8 January
%I International Conference on Computational Linguistics
%C Abu Dhabi, UAE
%F kiss-berend-2025-szegedai
%X This paper describes the participation of the SzegedAI team in Subtask A of Task 1 at the COLING 2025 Workshop on Detecting AI-Generated Content. Our solutions investigate the effectiveness of combining multi-class approaches with ensemble methods for detecting machine-generated text. This approach groups models into multiple classes based on properties such as model size or generative capabilities. Additionally, we employ a length-based method, utilizing specialized expert models designed for specific text length ranges. During inference, we condense multi-class predictions into a binary outcome, categorizing any label other than human as AI-generated. The effectiveness of both standard and snapshot ensemble techniques is evaluated. Although not all multi-class configurations outperformed the binary setup, our findings indicate that the combination of multi-class training and ensemble methods can enhance performance over single-method or binary approaches.
%U https://aclanthology.org/2025.genaidetect-1.15/
%P 166-172
Markdown (Informal)
[SzegedAI at GenAI Detection Task 1: Beyond Binary - Soft-Voting Multi-Class Classification for Binary Machine-Generated Text Detection Across Diverse Language Models](https://aclanthology.org/2025.genaidetect-1.15/) (Kiss & Berend, GenAIDetect 2025)
ACL