@inproceedings{jiao-etal-2025-cmi,
title = "{CMI}-{AIGCX} at {G}en{AI} Detection Task 2: Leveraging Multilingual Proxy {LLM}s for Machine-Generated Text Detection in Academic Essays",
author = "Jiao, Kaijie and
Yao, Xingyu and
Ma, Shixuan and
Fang, Sifan and
Guo, Zikang and
Xu, Benfeng and
Zhang, Licheng and
Wang, Quan and
Zhang, Yongdong and
Mao, Zhendong",
editor = "Alam, Firoj and
Nakov, Preslav and
Habash, Nizar and
Gurevych, Iryna and
Chowdhury, Shammur and
Shelmanov, Artem and
Wang, Yuxia and
Artemova, Ekaterina and
Kutlu, Mucahid and
Mikros, George",
booktitle = "Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2025.genaidetect-1.32/",
pages = "290--298",
abstract = "This paper presents the approach we proposed for GenAI Detection Task 2, which aims to classify a given text as either machine-generated or human-written, with a particular emphasis on academic essays. We participated in subtasks A and B, which focus on detecting English and Arabic essays, respectively. We propose a simple and efficient method for detecting machine-generated essays, where we use the Llama-3.1-8B as a proxy to capture the essence of each token in the text. These essences are processed and classified using a refined feature classification network. Our approach does not require fine-tuning the LLM. Instead, we leverage its extensive multilingual knowledge acquired during pretraining to significantly enhance detection performance. The results validate the effectiveness of our approach and demonstrate that leveraging a proxy model with diverse multilingual knowledge can significantly enhance the detection of machine-generated text across multiple languages, regardless of model size. In Subtask A, we achieved an F1 score of 99.9{\%}, ranking first out of 26 teams. In Subtask B, we achieved an F1 score of 96.5{\%}, placing fourth out of 22 teams, with the same score as the third-place team."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiao-etal-2025-cmi">
<titleInfo>
<title>CMI-AIGCX at GenAI Detection Task 2: Leveraging Multilingual Proxy LLMs for Machine-Generated Text Detection in Academic Essays</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaijie</namePart>
<namePart type="family">Jiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingyu</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shixuan</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sifan</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zikang</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benfeng</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Licheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongdong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhendong</namePart>
<namePart type="family">Mao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mucahid</namePart>
<namePart type="family">Kutlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Mikros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the approach we proposed for GenAI Detection Task 2, which aims to classify a given text as either machine-generated or human-written, with a particular emphasis on academic essays. We participated in subtasks A and B, which focus on detecting English and Arabic essays, respectively. We propose a simple and efficient method for detecting machine-generated essays, where we use the Llama-3.1-8B as a proxy to capture the essence of each token in the text. These essences are processed and classified using a refined feature classification network. Our approach does not require fine-tuning the LLM. Instead, we leverage its extensive multilingual knowledge acquired during pretraining to significantly enhance detection performance. The results validate the effectiveness of our approach and demonstrate that leveraging a proxy model with diverse multilingual knowledge can significantly enhance the detection of machine-generated text across multiple languages, regardless of model size. In Subtask A, we achieved an F1 score of 99.9%, ranking first out of 26 teams. In Subtask B, we achieved an F1 score of 96.5%, placing fourth out of 22 teams, with the same score as the third-place team.</abstract>
<identifier type="citekey">jiao-etal-2025-cmi</identifier>
<location>
<url>https://aclanthology.org/2025.genaidetect-1.32/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>290</start>
<end>298</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CMI-AIGCX at GenAI Detection Task 2: Leveraging Multilingual Proxy LLMs for Machine-Generated Text Detection in Academic Essays
%A Jiao, Kaijie
%A Yao, Xingyu
%A Ma, Shixuan
%A Fang, Sifan
%A Guo, Zikang
%A Xu, Benfeng
%A Zhang, Licheng
%A Wang, Quan
%A Zhang, Yongdong
%A Mao, Zhendong
%Y Alam, Firoj
%Y Nakov, Preslav
%Y Habash, Nizar
%Y Gurevych, Iryna
%Y Chowdhury, Shammur
%Y Shelmanov, Artem
%Y Wang, Yuxia
%Y Artemova, Ekaterina
%Y Kutlu, Mucahid
%Y Mikros, George
%S Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)
%D 2025
%8 January
%I International Conference on Computational Linguistics
%C Abu Dhabi, UAE
%F jiao-etal-2025-cmi
%X This paper presents the approach we proposed for GenAI Detection Task 2, which aims to classify a given text as either machine-generated or human-written, with a particular emphasis on academic essays. We participated in subtasks A and B, which focus on detecting English and Arabic essays, respectively. We propose a simple and efficient method for detecting machine-generated essays, where we use the Llama-3.1-8B as a proxy to capture the essence of each token in the text. These essences are processed and classified using a refined feature classification network. Our approach does not require fine-tuning the LLM. Instead, we leverage its extensive multilingual knowledge acquired during pretraining to significantly enhance detection performance. The results validate the effectiveness of our approach and demonstrate that leveraging a proxy model with diverse multilingual knowledge can significantly enhance the detection of machine-generated text across multiple languages, regardless of model size. In Subtask A, we achieved an F1 score of 99.9%, ranking first out of 26 teams. In Subtask B, we achieved an F1 score of 96.5%, placing fourth out of 22 teams, with the same score as the third-place team.
%U https://aclanthology.org/2025.genaidetect-1.32/
%P 290-298
Markdown (Informal)
[CMI-AIGCX at GenAI Detection Task 2: Leveraging Multilingual Proxy LLMs for Machine-Generated Text Detection in Academic Essays](https://aclanthology.org/2025.genaidetect-1.32/) (Jiao et al., GenAIDetect 2025)
ACL
- Kaijie Jiao, Xingyu Yao, Shixuan Ma, Sifan Fang, Zikang Guo, Benfeng Xu, Licheng Zhang, Quan Wang, Yongdong Zhang, and Zhendong Mao. 2025. CMI-AIGCX at GenAI Detection Task 2: Leveraging Multilingual Proxy LLMs for Machine-Generated Text Detection in Academic Essays. In Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect), pages 290–298, Abu Dhabi, UAE. International Conference on Computational Linguistics.