@inproceedings{dugan-etal-2025-genai,
title = "{G}en{AI} Content Detection Task 3: Cross-Domain Machine Generated Text Detection Challenge",
author = "Dugan, Liam and
Zhu, Andrew and
Alam, Firoj and
Nakov, Preslav and
Apidianaki, Marianna and
Callison-Burch, Chris",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.45/",
pages = "377--388",
abstract = "Recently there have been many shared tasks targeting the detection of generated text from Large Language Models (LLMs). However, these shared tasks tend to focus either on cases where text is limited to one particular domain or cases where text can be from many domains, some of which may not be seen during test time. In this shared task, using the newly released RAID benchmark, we aim to answer whether or not models can detect generated text from a large, yet fixed, number of domains and LLMs, all of which are seen during training. Over the course of three months, our task was attempted by 9 teams with 23 detector submissions. We find that multiple participants were able to obtain accuracies of over 99{\%} on machine-generated text from RAID while maintaining a 5{\%} False Positive Rate{---}suggesting that detectors are able to robustly detect text from many domains and models simultaneously. We discuss potential interpretations of this result and provide directions for future research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dugan-etal-2025-genai">
<titleInfo>
<title>GenAI Content Detection Task 3: Cross-Domain Machine Generated Text Detection Challenge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liam</namePart>
<namePart type="family">Dugan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Callison-Burch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mucahid</namePart>
<namePart type="family">Kutlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Mikros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently there have been many shared tasks targeting the detection of generated text from Large Language Models (LLMs). However, these shared tasks tend to focus either on cases where text is limited to one particular domain or cases where text can be from many domains, some of which may not be seen during test time. In this shared task, using the newly released RAID benchmark, we aim to answer whether or not models can detect generated text from a large, yet fixed, number of domains and LLMs, all of which are seen during training. Over the course of three months, our task was attempted by 9 teams with 23 detector submissions. We find that multiple participants were able to obtain accuracies of over 99% on machine-generated text from RAID while maintaining a 5% False Positive Rate—suggesting that detectors are able to robustly detect text from many domains and models simultaneously. We discuss potential interpretations of this result and provide directions for future research.</abstract>
<identifier type="citekey">dugan-etal-2025-genai</identifier>
<location>
<url>https://aclanthology.org/2025.genaidetect-1.45/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>377</start>
<end>388</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GenAI Content Detection Task 3: Cross-Domain Machine Generated Text Detection Challenge
%A Dugan, Liam
%A Zhu, Andrew
%A Alam, Firoj
%A Nakov, Preslav
%A Apidianaki, Marianna
%A Callison-Burch, Chris
%Y Alam, Firoj
%Y Nakov, Preslav
%Y Habash, Nizar
%Y Gurevych, Iryna
%Y Chowdhury, Shammur
%Y Shelmanov, Artem
%Y Wang, Yuxia
%Y Artemova, Ekaterina
%Y Kutlu, Mucahid
%Y Mikros, George
%S Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)
%D 2025
%8 January
%I International Conference on Computational Linguistics
%C Abu Dhabi, UAE
%F dugan-etal-2025-genai
%X Recently there have been many shared tasks targeting the detection of generated text from Large Language Models (LLMs). However, these shared tasks tend to focus either on cases where text is limited to one particular domain or cases where text can be from many domains, some of which may not be seen during test time. In this shared task, using the newly released RAID benchmark, we aim to answer whether or not models can detect generated text from a large, yet fixed, number of domains and LLMs, all of which are seen during training. Over the course of three months, our task was attempted by 9 teams with 23 detector submissions. We find that multiple participants were able to obtain accuracies of over 99% on machine-generated text from RAID while maintaining a 5% False Positive Rate—suggesting that detectors are able to robustly detect text from many domains and models simultaneously. We discuss potential interpretations of this result and provide directions for future research.
%U https://aclanthology.org/2025.genaidetect-1.45/
%P 377-388
Markdown (Informal)
[GenAI Content Detection Task 3: Cross-Domain Machine Generated Text Detection Challenge](https://aclanthology.org/2025.genaidetect-1.45/) (Dugan et al., GenAIDetect 2025)
ACL