@inproceedings{vanetik-etal-2025-towards,
title = "Towards Safer {H}ebrew Communication: A Dataset for Offensive Language Detoxification",
author = "Vanetik, Natalia and
Liberov, Lior and
Litvak, Marina and
Liebeskind, Chaya",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.149/",
pages = "1289--1298",
abstract = "Text detoxification is the task of transforming offensive or toxic content into a non-offensive form while preserving the original meaning. Despite increasing research interest in detoxification across various languages, no resources or benchmarks exist for Hebrew, a Semitic language with unique morphological, syntactic, and cultural characteristics. This paper introduces HeDetox, the first annotated dataset for text detoxification in Hebrew. HeDetox contains 600 sentence pairs, each consisting of an offensive source text and a non-offensive text rewritten with LLM and human intervention. We present a detailed dataset analysis and evaluation showing that the dataset benefits offensive language detection. HeDetox offers a foundational resource for Hebrew natural language processing, advancing research in offensive language mitigation and controllable text generation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vanetik-etal-2025-towards">
<titleInfo>
<title>Towards Safer Hebrew Communication: A Dataset for Offensive Language Detoxification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="family">Vanetik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lior</namePart>
<namePart type="family">Liberov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Litvak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaya</namePart>
<namePart type="family">Liebeskind</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text detoxification is the task of transforming offensive or toxic content into a non-offensive form while preserving the original meaning. Despite increasing research interest in detoxification across various languages, no resources or benchmarks exist for Hebrew, a Semitic language with unique morphological, syntactic, and cultural characteristics. This paper introduces HeDetox, the first annotated dataset for text detoxification in Hebrew. HeDetox contains 600 sentence pairs, each consisting of an offensive source text and a non-offensive text rewritten with LLM and human intervention. We present a detailed dataset analysis and evaluation showing that the dataset benefits offensive language detection. HeDetox offers a foundational resource for Hebrew natural language processing, advancing research in offensive language mitigation and controllable text generation.</abstract>
<identifier type="citekey">vanetik-etal-2025-towards</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.149/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1289</start>
<end>1298</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Safer Hebrew Communication: A Dataset for Offensive Language Detoxification
%A Vanetik, Natalia
%A Liberov, Lior
%A Litvak, Marina
%A Liebeskind, Chaya
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F vanetik-etal-2025-towards
%X Text detoxification is the task of transforming offensive or toxic content into a non-offensive form while preserving the original meaning. Despite increasing research interest in detoxification across various languages, no resources or benchmarks exist for Hebrew, a Semitic language with unique morphological, syntactic, and cultural characteristics. This paper introduces HeDetox, the first annotated dataset for text detoxification in Hebrew. HeDetox contains 600 sentence pairs, each consisting of an offensive source text and a non-offensive text rewritten with LLM and human intervention. We present a detailed dataset analysis and evaluation showing that the dataset benefits offensive language detection. HeDetox offers a foundational resource for Hebrew natural language processing, advancing research in offensive language mitigation and controllable text generation.
%U https://aclanthology.org/2025.ranlp-1.149/
%P 1289-1298
Markdown (Informal)
[Towards Safer Hebrew Communication: A Dataset for Offensive Language Detoxification](https://aclanthology.org/2025.ranlp-1.149/) (Vanetik et al., RANLP 2025)
ACL