@inproceedings{chernodub-etal-2023-detexd,
title = "{D}e{T}ex{D}: A Benchmark Dataset for Delicate Text Detection",
author = "Yavnyi, Serhii and
Sliusarenko, Oleksii and
Razzaghi, Jade and
Nahorna, Olena and
Mo, Yichen and
Hovakimyan, Knar and
Chernodub, Artem",
editor = {Chung, Yi-ling and
R{{\textbackslash}"ottger}, Paul and
Nozza, Debora and
Talat, Zeerak and
Mostafazadeh Davani, Aida},
booktitle = "The 7th Workshop on Online Abuse and Harms (WOAH)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.woah-1.2",
doi = "10.18653/v1/2023.woah-1.2",
pages = "14--28",
abstract = "Over the past few years, much research has been conducted to identify and regulate toxic language. However, few studies have addressed a broader range of sensitive texts that are not necessarily overtly toxic. In this paper, we introduce and define a new category of sensitive text called {``}delicate text.{''} We provide the taxonomy of delicate text and present a detailed annotation scheme. We annotate DeTexD, the first benchmark dataset for delicate text detection. The significance of the difference in the definitions is highlighted by the relative performance deltas between models trained each definitions and corpora and evaluated on the other. We make publicly available the DeTexD Benchmark dataset, annotation guidelines, and baseline model for delicate text detection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chernodub-etal-2023-detexd">
<titleInfo>
<title>DeTexD: A Benchmark Dataset for Delicate Text Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Serhii</namePart>
<namePart type="family">Yavnyi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleksii</namePart>
<namePart type="family">Sliusarenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jade</namePart>
<namePart type="family">Razzaghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olena</namePart>
<namePart type="family">Nahorna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichen</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Knar</namePart>
<namePart type="family">Hovakimyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Chernodub</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 7th Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi-ling</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">R\textbackslash”ottger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Over the past few years, much research has been conducted to identify and regulate toxic language. However, few studies have addressed a broader range of sensitive texts that are not necessarily overtly toxic. In this paper, we introduce and define a new category of sensitive text called “delicate text.” We provide the taxonomy of delicate text and present a detailed annotation scheme. We annotate DeTexD, the first benchmark dataset for delicate text detection. The significance of the difference in the definitions is highlighted by the relative performance deltas between models trained each definitions and corpora and evaluated on the other. We make publicly available the DeTexD Benchmark dataset, annotation guidelines, and baseline model for delicate text detection.</abstract>
<identifier type="citekey">chernodub-etal-2023-detexd</identifier>
<identifier type="doi">10.18653/v1/2023.woah-1.2</identifier>
<location>
<url>https://aclanthology.org/2023.woah-1.2</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>14</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DeTexD: A Benchmark Dataset for Delicate Text Detection
%A Yavnyi, Serhii
%A Sliusarenko, Oleksii
%A Razzaghi, Jade
%A Nahorna, Olena
%A Mo, Yichen
%A Hovakimyan, Knar
%A Chernodub, Artem
%Y Chung, Yi-ling
%Y R\textbackslash”ottger, Paul
%Y Nozza, Debora
%Y Talat, Zeerak
%Y Mostafazadeh Davani, Aida
%S The 7th Workshop on Online Abuse and Harms (WOAH)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F chernodub-etal-2023-detexd
%X Over the past few years, much research has been conducted to identify and regulate toxic language. However, few studies have addressed a broader range of sensitive texts that are not necessarily overtly toxic. In this paper, we introduce and define a new category of sensitive text called “delicate text.” We provide the taxonomy of delicate text and present a detailed annotation scheme. We annotate DeTexD, the first benchmark dataset for delicate text detection. The significance of the difference in the definitions is highlighted by the relative performance deltas between models trained each definitions and corpora and evaluated on the other. We make publicly available the DeTexD Benchmark dataset, annotation guidelines, and baseline model for delicate text detection.
%R 10.18653/v1/2023.woah-1.2
%U https://aclanthology.org/2023.woah-1.2
%U https://doi.org/10.18653/v1/2023.woah-1.2
%P 14-28
Markdown (Informal)
[DeTexD: A Benchmark Dataset for Delicate Text Detection](https://aclanthology.org/2023.woah-1.2) (Yavnyi et al., WOAH 2023)
ACL
- Serhii Yavnyi, Oleksii Sliusarenko, Jade Razzaghi, Olena Nahorna, Yichen Mo, Knar Hovakimyan, and Artem Chernodub. 2023. DeTexD: A Benchmark Dataset for Delicate Text Detection. In The 7th Workshop on Online Abuse and Harms (WOAH), pages 14–28, Toronto, Canada. Association for Computational Linguistics.