@inproceedings{rezaei-etal-2024-clulab,
title = "{CLUL}ab-{U}of{A} at {S}em{E}val-2024 Task 8: Detecting Machine-Generated Text Using Triplet-Loss-Trained Text Similarity and Text Classification",
author = "Rezaei, Mohammadhossein and
Kwon, Yeaeun and
Sanayei, Reza and
Singh, Abhyuday and
Bethard, Steven",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.215",
doi = "10.18653/v1/2024.semeval-1.215",
pages = "1498--1504",
abstract = "Detecting machine-generated text is a critical task in the era of large language models. In this paper, we present our systems for SemEval-2024 Task 8, which focuses on multi-class classification to discern between human-written and maching-generated texts by five state-of-the-art large language models. We propose three different systems: unsupervised text similarity, triplet-loss-trained text similarity, and text classification. We show that the triplet-loss trained text similarity system outperforms the other systems, achieving 80{\%} accuracy on the test set and surpassing the baseline model for this subtask. Additionally, our text classification system, which takes into account sentence paraphrases generated by the candidate models, also outperforms the unsupervised text similarity system, achieving 74{\%} accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rezaei-etal-2024-clulab">
<titleInfo>
<title>CLULab-UofA at SemEval-2024 Task 8: Detecting Machine-Generated Text Using Triplet-Loss-Trained Text Similarity and Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammadhossein</namePart>
<namePart type="family">Rezaei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yeaeun</namePart>
<namePart type="family">Kwon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reza</namePart>
<namePart type="family">Sanayei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhyuday</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Detecting machine-generated text is a critical task in the era of large language models. In this paper, we present our systems for SemEval-2024 Task 8, which focuses on multi-class classification to discern between human-written and maching-generated texts by five state-of-the-art large language models. We propose three different systems: unsupervised text similarity, triplet-loss-trained text similarity, and text classification. We show that the triplet-loss trained text similarity system outperforms the other systems, achieving 80% accuracy on the test set and surpassing the baseline model for this subtask. Additionally, our text classification system, which takes into account sentence paraphrases generated by the candidate models, also outperforms the unsupervised text similarity system, achieving 74% accuracy.</abstract>
<identifier type="citekey">rezaei-etal-2024-clulab</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.215</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.215</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1498</start>
<end>1504</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CLULab-UofA at SemEval-2024 Task 8: Detecting Machine-Generated Text Using Triplet-Loss-Trained Text Similarity and Text Classification
%A Rezaei, Mohammadhossein
%A Kwon, Yeaeun
%A Sanayei, Reza
%A Singh, Abhyuday
%A Bethard, Steven
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F rezaei-etal-2024-clulab
%X Detecting machine-generated text is a critical task in the era of large language models. In this paper, we present our systems for SemEval-2024 Task 8, which focuses on multi-class classification to discern between human-written and maching-generated texts by five state-of-the-art large language models. We propose three different systems: unsupervised text similarity, triplet-loss-trained text similarity, and text classification. We show that the triplet-loss trained text similarity system outperforms the other systems, achieving 80% accuracy on the test set and surpassing the baseline model for this subtask. Additionally, our text classification system, which takes into account sentence paraphrases generated by the candidate models, also outperforms the unsupervised text similarity system, achieving 74% accuracy.
%R 10.18653/v1/2024.semeval-1.215
%U https://aclanthology.org/2024.semeval-1.215
%U https://doi.org/10.18653/v1/2024.semeval-1.215
%P 1498-1504
Markdown (Informal)
[CLULab-UofA at SemEval-2024 Task 8: Detecting Machine-Generated Text Using Triplet-Loss-Trained Text Similarity and Text Classification](https://aclanthology.org/2024.semeval-1.215) (Rezaei et al., SemEval 2024)
ACL