@inproceedings{gutierrez-megias-etal-2024-influence,
title = "The Influence of the Perplexity Score in the Detection of Machine-generated Texts",
author = "Guti{\'e}rrez Meg{\'i}as, Alberto Jos{\'e} and
Ure{\~n}a-L{\'o}pez, L. Alfonso and
Mart{\'i}nez C{\'a}mara, Eugenio",
editor = "Mitkov, Ruslan and
Ezzini, Saad and
Ranasinghe, Tharindu and
Ezeani, Ignatius and
Khallaf, Nouran and
Acarturk, Cengiz and
Bradbury, Matthew and
El-Haj, Mo and
Rayson, Paul",
booktitle = "Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
month = jul,
year = "2024",
address = "Lancaster, UK",
publisher = "International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
url = "https://aclanthology.org/2024.nlpaics-1.10/",
pages = "80--85",
abstract = "The high performance of large language models (LLM) generating natural language represents a real threat, since they can be leveraged to generate any kind of deceptive content. Since there are still disparities among the language generated by machines and the human language, we claim that perplexity may be used as classification signal to discern between machine and human text. We propose a classification model based on XLM-RoBERTa, and we evaluate it on the M4 dataset. The results show that the perplexity score is useful for the identification of machine generated text, but it is constrained by the differences among the LLMs used in the training and test sets."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gutierrez-megias-etal-2024-influence">
<titleInfo>
<title>The Influence of the Perplexity Score in the Detection of Machine-generated Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="given">José</namePart>
<namePart type="family">Gutiérrez Megías</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">L</namePart>
<namePart type="given">Alfonso</namePart>
<namePart type="family">Ureña-López</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugenio</namePart>
<namePart type="family">Martínez Cámara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ignatius</namePart>
<namePart type="family">Ezeani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nouran</namePart>
<namePart type="family">Khallaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cengiz</namePart>
<namePart type="family">Acarturk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Bradbury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security</publisher>
<place>
<placeTerm type="text">Lancaster, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The high performance of large language models (LLM) generating natural language represents a real threat, since they can be leveraged to generate any kind of deceptive content. Since there are still disparities among the language generated by machines and the human language, we claim that perplexity may be used as classification signal to discern between machine and human text. We propose a classification model based on XLM-RoBERTa, and we evaluate it on the M4 dataset. The results show that the perplexity score is useful for the identification of machine generated text, but it is constrained by the differences among the LLMs used in the training and test sets.</abstract>
<identifier type="citekey">gutierrez-megias-etal-2024-influence</identifier>
<location>
<url>https://aclanthology.org/2024.nlpaics-1.10/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>80</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Influence of the Perplexity Score in the Detection of Machine-generated Texts
%A Gutiérrez Megías, Alberto José
%A Ureña-López, L. Alfonso
%A Martínez Cámara, Eugenio
%Y Mitkov, Ruslan
%Y Ezzini, Saad
%Y Ranasinghe, Tharindu
%Y Ezeani, Ignatius
%Y Khallaf, Nouran
%Y Acarturk, Cengiz
%Y Bradbury, Matthew
%Y El-Haj, Mo
%Y Rayson, Paul
%S Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security
%D 2024
%8 July
%I International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security
%C Lancaster, UK
%F gutierrez-megias-etal-2024-influence
%X The high performance of large language models (LLM) generating natural language represents a real threat, since they can be leveraged to generate any kind of deceptive content. Since there are still disparities among the language generated by machines and the human language, we claim that perplexity may be used as classification signal to discern between machine and human text. We propose a classification model based on XLM-RoBERTa, and we evaluate it on the M4 dataset. The results show that the perplexity score is useful for the identification of machine generated text, but it is constrained by the differences among the LLMs used in the training and test sets.
%U https://aclanthology.org/2024.nlpaics-1.10/
%P 80-85
Markdown (Informal)
[The Influence of the Perplexity Score in the Detection of Machine-generated Texts](https://aclanthology.org/2024.nlpaics-1.10/) (Gutiérrez Megías et al., NLPAICS 2024)
ACL
- Alberto José Gutiérrez Megías, L. Alfonso Ureña-López, and Eugenio Martínez Cámara. 2024. The Influence of the Perplexity Score in the Detection of Machine-generated Texts. In Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security, pages 80–85, Lancaster, UK. International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security.