@inproceedings{sarvazyan-etal-2024-genaios,
title = "Genaios at {S}em{E}val-2024 Task 8: Detecting Machine-Generated Text by Mixing Language Model Probabilistic Features",
author = "Sarvazyan, Areg Mikael and
Gonz{\'a}lez, Jos{\'e} {\'A}ngel and
Franco-salvador, Marc",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.17/",
doi = "10.18653/v1/2024.semeval-1.17",
pages = "101--107",
abstract = "This paper describes the participation of the Genaios team in the monolingual track of Subtask A at SemEval-2024 Task 8. Our best system, LLMixtic, is a Transformer Encoder that mixes token-level probabilistic features extracted from four LLaMA-2 models. We obtained the best results in the official ranking (96.88{\%} accuracy), showing a false positive ratio of 4.38{\%} and a false negative ratio of 1.97{\%} on the test set. We further study LLMixtic through ablation, probabilistic, and attention analyses, finding that (i) performance improves as more LLMs and probabilistic features are included, (ii) LLMixtic puts most attention on the features of the last tokens, (iii) it fails on samples where human text probabilities become consistently higher than for generated text, and (iv) LLMixtic`s false negatives exhibit a bias towards text with newlines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sarvazyan-etal-2024-genaios">
<titleInfo>
<title>Genaios at SemEval-2024 Task 8: Detecting Machine-Generated Text by Mixing Language Model Probabilistic Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Areg</namePart>
<namePart type="given">Mikael</namePart>
<namePart type="family">Sarvazyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">Ángel</namePart>
<namePart type="family">González</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Franco-salvador</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the participation of the Genaios team in the monolingual track of Subtask A at SemEval-2024 Task 8. Our best system, LLMixtic, is a Transformer Encoder that mixes token-level probabilistic features extracted from four LLaMA-2 models. We obtained the best results in the official ranking (96.88% accuracy), showing a false positive ratio of 4.38% and a false negative ratio of 1.97% on the test set. We further study LLMixtic through ablation, probabilistic, and attention analyses, finding that (i) performance improves as more LLMs and probabilistic features are included, (ii) LLMixtic puts most attention on the features of the last tokens, (iii) it fails on samples where human text probabilities become consistently higher than for generated text, and (iv) LLMixtic‘s false negatives exhibit a bias towards text with newlines.</abstract>
<identifier type="citekey">sarvazyan-etal-2024-genaios</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.17</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.17/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>101</start>
<end>107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Genaios at SemEval-2024 Task 8: Detecting Machine-Generated Text by Mixing Language Model Probabilistic Features
%A Sarvazyan, Areg Mikael
%A González, José Ángel
%A Franco-salvador, Marc
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F sarvazyan-etal-2024-genaios
%X This paper describes the participation of the Genaios team in the monolingual track of Subtask A at SemEval-2024 Task 8. Our best system, LLMixtic, is a Transformer Encoder that mixes token-level probabilistic features extracted from four LLaMA-2 models. We obtained the best results in the official ranking (96.88% accuracy), showing a false positive ratio of 4.38% and a false negative ratio of 1.97% on the test set. We further study LLMixtic through ablation, probabilistic, and attention analyses, finding that (i) performance improves as more LLMs and probabilistic features are included, (ii) LLMixtic puts most attention on the features of the last tokens, (iii) it fails on samples where human text probabilities become consistently higher than for generated text, and (iv) LLMixtic‘s false negatives exhibit a bias towards text with newlines.
%R 10.18653/v1/2024.semeval-1.17
%U https://aclanthology.org/2024.semeval-1.17/
%U https://doi.org/10.18653/v1/2024.semeval-1.17
%P 101-107
Markdown (Informal)
[Genaios at SemEval-2024 Task 8: Detecting Machine-Generated Text by Mixing Language Model Probabilistic Features](https://aclanthology.org/2024.semeval-1.17/) (Sarvazyan et al., SemEval 2024)
ACL