@inproceedings{tran-etal-2024-l3i,
title = "L3i++ at {S}em{E}val-2024 Task 8: Can Fine-tuned Large Language Model Detect Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text?",
author = "Tran, Hanh Thi Hong and
Nguyen, Tien Nam and
Doucet, Antoine and
Pollak, Senja",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.3",
doi = "10.18653/v1/2024.semeval-1.3",
pages = "13--21",
abstract = "This paper summarizes our participation in SemEval-2024 Task 8: Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection. In this task, we aim to solve two over three Subtasks: (1) Monolingual and Multilingual Binary Human-Written vs. Machine-Generated Text Classification; and (2) Multi-Way Machine-Generated Text Classification. We conducted a comprehensive comparative study across three methodological groups: Five metric-based models (Log-Likelihood, Rank, Log-Rank, Entropy, and MFDMetric), two fine-tuned sequence-labeling language models (RoBERTA and XLM-R); and a fine-tuned large-scale language model (LS-LLaMA). Our findings suggest that our LLM outperformed both traditional sequence-labeling LM benchmarks and metric-based approaches. Furthermore, our fine-tuned classifier excelled in detecting machine-generated multilingual texts and accurately classifying machine-generated texts within a specific category, (e.g., ChatGPT, bloomz, dolly). However, they do exhibit challenges in detecting them in other categories (e.g., cohere, and davinci). This is due to potential overlap in the distribution of the metric among various LLMs. Overall, we achieved a 6th rank in both Multilingual Binary Human-Written vs. Machine-Generated Text Classification and Multi-Way Machine-Generated Text Classification on the leaderboard.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tran-etal-2024-l3i">
<titleInfo>
<title>L3i++ at SemEval-2024 Task 8: Can Fine-tuned Large Language Model Detect Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hanh</namePart>
<namePart type="given">Thi</namePart>
<namePart type="given">Hong</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tien</namePart>
<namePart type="given">Nam</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Doucet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper summarizes our participation in SemEval-2024 Task 8: Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection. In this task, we aim to solve two over three Subtasks: (1) Monolingual and Multilingual Binary Human-Written vs. Machine-Generated Text Classification; and (2) Multi-Way Machine-Generated Text Classification. We conducted a comprehensive comparative study across three methodological groups: Five metric-based models (Log-Likelihood, Rank, Log-Rank, Entropy, and MFDMetric), two fine-tuned sequence-labeling language models (RoBERTA and XLM-R); and a fine-tuned large-scale language model (LS-LLaMA). Our findings suggest that our LLM outperformed both traditional sequence-labeling LM benchmarks and metric-based approaches. Furthermore, our fine-tuned classifier excelled in detecting machine-generated multilingual texts and accurately classifying machine-generated texts within a specific category, (e.g., ChatGPT, bloomz, dolly). However, they do exhibit challenges in detecting them in other categories (e.g., cohere, and davinci). This is due to potential overlap in the distribution of the metric among various LLMs. Overall, we achieved a 6th rank in both Multilingual Binary Human-Written vs. Machine-Generated Text Classification and Multi-Way Machine-Generated Text Classification on the leaderboard.</abstract>
<identifier type="citekey">tran-etal-2024-l3i</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.3</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.3</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>13</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T L3i++ at SemEval-2024 Task 8: Can Fine-tuned Large Language Model Detect Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text?
%A Tran, Hanh Thi Hong
%A Nguyen, Tien Nam
%A Doucet, Antoine
%A Pollak, Senja
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F tran-etal-2024-l3i
%X This paper summarizes our participation in SemEval-2024 Task 8: Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection. In this task, we aim to solve two over three Subtasks: (1) Monolingual and Multilingual Binary Human-Written vs. Machine-Generated Text Classification; and (2) Multi-Way Machine-Generated Text Classification. We conducted a comprehensive comparative study across three methodological groups: Five metric-based models (Log-Likelihood, Rank, Log-Rank, Entropy, and MFDMetric), two fine-tuned sequence-labeling language models (RoBERTA and XLM-R); and a fine-tuned large-scale language model (LS-LLaMA). Our findings suggest that our LLM outperformed both traditional sequence-labeling LM benchmarks and metric-based approaches. Furthermore, our fine-tuned classifier excelled in detecting machine-generated multilingual texts and accurately classifying machine-generated texts within a specific category, (e.g., ChatGPT, bloomz, dolly). However, they do exhibit challenges in detecting them in other categories (e.g., cohere, and davinci). This is due to potential overlap in the distribution of the metric among various LLMs. Overall, we achieved a 6th rank in both Multilingual Binary Human-Written vs. Machine-Generated Text Classification and Multi-Way Machine-Generated Text Classification on the leaderboard.
%R 10.18653/v1/2024.semeval-1.3
%U https://aclanthology.org/2024.semeval-1.3
%U https://doi.org/10.18653/v1/2024.semeval-1.3
%P 13-21
Markdown (Informal)
[L3i++ at SemEval-2024 Task 8: Can Fine-tuned Large Language Model Detect Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text?](https://aclanthology.org/2024.semeval-1.3) (Tran et al., SemEval 2024)
ACL