@inproceedings{rodero-pena-etal-2024-i2c,
title = "{I}2{C}-{H}uelva at {S}em{E}val-2024 Task 8: Boosting {AI}-Generated Text Detection with Multimodal Models and Optimized Ensembles",
author = "Rodero Pe{\~n}a, Alberto and
Mata Vazquez, Jacinto and
Pach{\'o}n {\'A}lvarez, Victoria",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.121",
doi = "10.18653/v1/2024.semeval-1.121",
pages = "845--852",
abstract = "With the rise of AI-based text generators, the need for effective detection mechanisms has become paramount. This paper presents new techniques for building adaptable models and optimizing training aspects for identifying synthetically produced texts across multiple generators and domains. The study, divided into binary and multilabel classification tasks, avoids overfitting through strategic training data limitation. A key innovation is the incorporation of multimodal models that blend numerical text features with conventional NLP approaches. The work also delves into optimizing ensemble model combinations via various voting methods, focusing on accuracy as the official metric. The optimized ensemble strategy demonstrates significant efficacy in both subtasks, highlighting the potential of multimodal and ensemble methods in enhancing the robustness of detection systems against emerging text generators.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rodero-pena-etal-2024-i2c">
<titleInfo>
<title>I2C-Huelva at SemEval-2024 Task 8: Boosting AI-Generated Text Detection with Multimodal Models and Optimized Ensembles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Rodero Peña</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacinto</namePart>
<namePart type="family">Mata Vazquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Pachón Álvarez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the rise of AI-based text generators, the need for effective detection mechanisms has become paramount. This paper presents new techniques for building adaptable models and optimizing training aspects for identifying synthetically produced texts across multiple generators and domains. The study, divided into binary and multilabel classification tasks, avoids overfitting through strategic training data limitation. A key innovation is the incorporation of multimodal models that blend numerical text features with conventional NLP approaches. The work also delves into optimizing ensemble model combinations via various voting methods, focusing on accuracy as the official metric. The optimized ensemble strategy demonstrates significant efficacy in both subtasks, highlighting the potential of multimodal and ensemble methods in enhancing the robustness of detection systems against emerging text generators.</abstract>
<identifier type="citekey">rodero-pena-etal-2024-i2c</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.121</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.121</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>845</start>
<end>852</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T I2C-Huelva at SemEval-2024 Task 8: Boosting AI-Generated Text Detection with Multimodal Models and Optimized Ensembles
%A Rodero Peña, Alberto
%A Mata Vazquez, Jacinto
%A Pachón Álvarez, Victoria
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F rodero-pena-etal-2024-i2c
%X With the rise of AI-based text generators, the need for effective detection mechanisms has become paramount. This paper presents new techniques for building adaptable models and optimizing training aspects for identifying synthetically produced texts across multiple generators and domains. The study, divided into binary and multilabel classification tasks, avoids overfitting through strategic training data limitation. A key innovation is the incorporation of multimodal models that blend numerical text features with conventional NLP approaches. The work also delves into optimizing ensemble model combinations via various voting methods, focusing on accuracy as the official metric. The optimized ensemble strategy demonstrates significant efficacy in both subtasks, highlighting the potential of multimodal and ensemble methods in enhancing the robustness of detection systems against emerging text generators.
%R 10.18653/v1/2024.semeval-1.121
%U https://aclanthology.org/2024.semeval-1.121
%U https://doi.org/10.18653/v1/2024.semeval-1.121
%P 845-852
Markdown (Informal)
[I2C-Huelva at SemEval-2024 Task 8: Boosting AI-Generated Text Detection with Multimodal Models and Optimized Ensembles](https://aclanthology.org/2024.semeval-1.121) (Rodero Peña et al., SemEval 2024)
ACL