@inproceedings{de-ruvo-etal-2025-detoxify,
title = "Detoxify-{IT}: An {I}talian Parallel Dataset for Text Detoxification",
author = "De Ruvo, Viola and
Muti, Arianna and
Dementieva, Daryna and
Nozza, Debora",
editor = "Calabrese, Agostina and
de Kock, Christine and
Nozza, Debora and
Plaza-del-Arco, Flor Miriam and
Talat, Zeerak and
Vargas, Francielle",
booktitle = "Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.woah-1.24/",
pages = "267--275",
ISBN = "979-8-89176-105-6",
abstract = "Toxic language online poses growing challenges for content moderation. Detoxification, which rewrites toxic content into neutral form, offers a promising alternative but remains underexplored beyond English. We present Detoxify-IT, the first Italian dataset for this task, featuring toxic comments and their human-written neutral rewrites. Our experiments show that even limited fine-tuning on Italian data leads to notable improvements in content preservation and fluency compared to both multilingual models and LLMs used in zero-shot settings, underlining the need for language-specific resources. This work enables detoxification research in Italian and supports broader efforts toward safer, more inclusive online communication."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-ruvo-etal-2025-detoxify">
<titleInfo>
<title>Detoxify-IT: An Italian Parallel Dataset for Text Detoxification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Viola</namePart>
<namePart type="family">De Ruvo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Muti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daryna</namePart>
<namePart type="family">Dementieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agostina</namePart>
<namePart type="family">Calabrese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">de Kock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flor</namePart>
<namePart type="given">Miriam</namePart>
<namePart type="family">Plaza-del-Arco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francielle</namePart>
<namePart type="family">Vargas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-105-6</identifier>
</relatedItem>
<abstract>Toxic language online poses growing challenges for content moderation. Detoxification, which rewrites toxic content into neutral form, offers a promising alternative but remains underexplored beyond English. We present Detoxify-IT, the first Italian dataset for this task, featuring toxic comments and their human-written neutral rewrites. Our experiments show that even limited fine-tuning on Italian data leads to notable improvements in content preservation and fluency compared to both multilingual models and LLMs used in zero-shot settings, underlining the need for language-specific resources. This work enables detoxification research in Italian and supports broader efforts toward safer, more inclusive online communication.</abstract>
<identifier type="citekey">de-ruvo-etal-2025-detoxify</identifier>
<location>
<url>https://aclanthology.org/2025.woah-1.24/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>267</start>
<end>275</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detoxify-IT: An Italian Parallel Dataset for Text Detoxification
%A De Ruvo, Viola
%A Muti, Arianna
%A Dementieva, Daryna
%A Nozza, Debora
%Y Calabrese, Agostina
%Y de Kock, Christine
%Y Nozza, Debora
%Y Plaza-del-Arco, Flor Miriam
%Y Talat, Zeerak
%Y Vargas, Francielle
%S Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-105-6
%F de-ruvo-etal-2025-detoxify
%X Toxic language online poses growing challenges for content moderation. Detoxification, which rewrites toxic content into neutral form, offers a promising alternative but remains underexplored beyond English. We present Detoxify-IT, the first Italian dataset for this task, featuring toxic comments and their human-written neutral rewrites. Our experiments show that even limited fine-tuning on Italian data leads to notable improvements in content preservation and fluency compared to both multilingual models and LLMs used in zero-shot settings, underlining the need for language-specific resources. This work enables detoxification research in Italian and supports broader efforts toward safer, more inclusive online communication.
%U https://aclanthology.org/2025.woah-1.24/
%P 267-275
Markdown (Informal)
[Detoxify-IT: An Italian Parallel Dataset for Text Detoxification](https://aclanthology.org/2025.woah-1.24/) (De Ruvo et al., WOAH 2025)
ACL