@inproceedings{radulescu-2026-fnlp412,
title = "{FNLP}412@{EEUCA} 2026: Understanding Toxic Behavioral Intent in Gaming Chat Logs using Transfer Learning and Synthetic Data Augmentation",
author = "Radulescu, Mihai Radu",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Thapa, Surendrabikram and
Tanev, Hristo},
booktitle = "Proceedings of the 9th Workshop on Event Extraction and Understanding: Challenges and Applications ({EEUCA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eeuca-1.10/",
pages = "96--103",
ISBN = "979-8-89176-402-6",
abstract = "Our paper explores several machine learning methods for detecting toxic language in gaming-related chat utterances. We start with the GameTox dataset, perform some data preprocessing and augment the minority classes with LLM-generated synthetic data. We then set a baseline using a classic Logistic Regression model and continue to explore severalapproaches to surpassing it, by leveraging the leading multilingual transformer models (XLM-RoBERTa and DeBERTa-V3) to classify our test data. We achieve a top result of 0.6725 Macro-F1 (2nd place on shared task leaderboard) using a MDeBERTa-V3 model which we pretrained on the Jigsaw dataset for 1 epoch and then fine-tuned on our GameTox data for 5 epochs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="radulescu-2026-fnlp412">
<titleInfo>
<title>FNLP412@EEUCA 2026: Understanding Toxic Behavioral Intent in Gaming Chat Logs using Transfer Learning and Synthetic Data Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="given">Radu</namePart>
<namePart type="family">Radulescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Event Extraction and Understanding: Challenges and Applications (EEUCA 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-402-6</identifier>
</relatedItem>
<abstract>Our paper explores several machine learning methods for detecting toxic language in gaming-related chat utterances. We start with the GameTox dataset, perform some data preprocessing and augment the minority classes with LLM-generated synthetic data. We then set a baseline using a classic Logistic Regression model and continue to explore severalapproaches to surpassing it, by leveraging the leading multilingual transformer models (XLM-RoBERTa and DeBERTa-V3) to classify our test data. We achieve a top result of 0.6725 Macro-F1 (2nd place on shared task leaderboard) using a MDeBERTa-V3 model which we pretrained on the Jigsaw dataset for 1 epoch and then fine-tuned on our GameTox data for 5 epochs.</abstract>
<identifier type="citekey">radulescu-2026-fnlp412</identifier>
<location>
<url>https://aclanthology.org/2026.eeuca-1.10/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>96</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FNLP412@EEUCA 2026: Understanding Toxic Behavioral Intent in Gaming Chat Logs using Transfer Learning and Synthetic Data Augmentation
%A Radulescu, Mihai Radu
%Y Hürriyetoğlu, Ali
%Y Thapa, Surendrabikram
%Y Tanev, Hristo
%S Proceedings of the 9th Workshop on Event Extraction and Understanding: Challenges and Applications (EEUCA 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-402-6
%F radulescu-2026-fnlp412
%X Our paper explores several machine learning methods for detecting toxic language in gaming-related chat utterances. We start with the GameTox dataset, perform some data preprocessing and augment the minority classes with LLM-generated synthetic data. We then set a baseline using a classic Logistic Regression model and continue to explore severalapproaches to surpassing it, by leveraging the leading multilingual transformer models (XLM-RoBERTa and DeBERTa-V3) to classify our test data. We achieve a top result of 0.6725 Macro-F1 (2nd place on shared task leaderboard) using a MDeBERTa-V3 model which we pretrained on the Jigsaw dataset for 1 epoch and then fine-tuned on our GameTox data for 5 epochs.
%U https://aclanthology.org/2026.eeuca-1.10/
%P 96-103
Markdown (Informal)
[FNLP412@EEUCA 2026: Understanding Toxic Behavioral Intent in Gaming Chat Logs using Transfer Learning and Synthetic Data Augmentation](https://aclanthology.org/2026.eeuca-1.10/) (Radulescu, EEUCA 2026)
ACL