@inproceedings{sourabrata-etal-2023-text,
title = "Text Detoxification as Style Transfer in {E}nglish and {H}indi",
author = "Mukherjee, Sourabrata and
Bansal, Akanksha and
Kr. Ojha, Atul and
P. McCrae, John and
Dusek, Ondrej",
editor = "D. Pawar, Jyoti and
Lalitha Devi, Sobha",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.13",
pages = "133--144",
abstract = "This paper focuses on text detoxification, i.e., automatically converting toxic text into nontoxic text. This task contributes to safer and more respectful online communication and can be considered a Text Style Transfer (TST) task, where the text{'}s style changes while its content is preserved. We present three approaches: (i) knowledge transfer from a similar task (ii) multi-task learning approach, combining sequence-to-sequence modeling with various toxicity classification tasks, and (iii) delete and reconstruct approach. To support our research, we utilize a dataset provided by Dementieva et al. (2021), which contains multiple versions of detoxified texts corresponding to toxic texts. In our experiments, we selected the best variants through expert human annotators, creating a dataset where each toxic sentence is paired with a single, appropriate detoxified version. Additionally, we introduced a small Hindi parallel dataset, aligning with a part of the English dataset, suitable for evaluation purposes. Our results demonstrate that our approach effectively balances text detoxification while preserving the actual content and maintaining fluency.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sourabrata-etal-2023-text">
<titleInfo>
<title>Text Detoxification as Style Transfer in English and Hindi</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sourabrata</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akanksha</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="family">Kr. Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">P. McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Dusek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jyoti</namePart>
<namePart type="family">D. Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper focuses on text detoxification, i.e., automatically converting toxic text into nontoxic text. This task contributes to safer and more respectful online communication and can be considered a Text Style Transfer (TST) task, where the text’s style changes while its content is preserved. We present three approaches: (i) knowledge transfer from a similar task (ii) multi-task learning approach, combining sequence-to-sequence modeling with various toxicity classification tasks, and (iii) delete and reconstruct approach. To support our research, we utilize a dataset provided by Dementieva et al. (2021), which contains multiple versions of detoxified texts corresponding to toxic texts. In our experiments, we selected the best variants through expert human annotators, creating a dataset where each toxic sentence is paired with a single, appropriate detoxified version. Additionally, we introduced a small Hindi parallel dataset, aligning with a part of the English dataset, suitable for evaluation purposes. Our results demonstrate that our approach effectively balances text detoxification while preserving the actual content and maintaining fluency.</abstract>
<identifier type="citekey">sourabrata-etal-2023-text</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.13</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>133</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text Detoxification as Style Transfer in English and Hindi
%A Mukherjee, Sourabrata
%A Bansal, Akanksha
%A Kr. Ojha, Atul
%A P. McCrae, John
%A Dusek, Ondrej
%Y D. Pawar, Jyoti
%Y Lalitha Devi, Sobha
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F sourabrata-etal-2023-text
%X This paper focuses on text detoxification, i.e., automatically converting toxic text into nontoxic text. This task contributes to safer and more respectful online communication and can be considered a Text Style Transfer (TST) task, where the text’s style changes while its content is preserved. We present three approaches: (i) knowledge transfer from a similar task (ii) multi-task learning approach, combining sequence-to-sequence modeling with various toxicity classification tasks, and (iii) delete and reconstruct approach. To support our research, we utilize a dataset provided by Dementieva et al. (2021), which contains multiple versions of detoxified texts corresponding to toxic texts. In our experiments, we selected the best variants through expert human annotators, creating a dataset where each toxic sentence is paired with a single, appropriate detoxified version. Additionally, we introduced a small Hindi parallel dataset, aligning with a part of the English dataset, suitable for evaluation purposes. Our results demonstrate that our approach effectively balances text detoxification while preserving the actual content and maintaining fluency.
%U https://aclanthology.org/2023.icon-1.13
%P 133-144
Markdown (Informal)
[Text Detoxification as Style Transfer in English and Hindi](https://aclanthology.org/2023.icon-1.13) (Mukherjee et al., ICON 2023)
ACL
- Sourabrata Mukherjee, Akanksha Bansal, Atul Kr. Ojha, John P. McCrae, and Ondrej Dusek. 2023. Text Detoxification as Style Transfer in English and Hindi. In Proceedings of the 20th International Conference on Natural Language Processing (ICON), pages 133–144, Goa University, Goa, India. NLP Association of India (NLPAI).