@inproceedings{yang-lin-2020-tocp,
title = "{TOCP}: A Dataset for {C}hinese Profanity Processing",
author = "Yang, Hsu and
Lin, Chuan-Jie",
editor = "Kumar, Ritesh and
Ojha, Atul Kr. and
Lahiri, Bornini and
Zampieri, Marcos and
Malmasi, Shervin and
Murdock, Vanessa and
Kadar, Daniel",
booktitle = "Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.trac-1.2",
pages = "6--12",
abstract = "This paper introduced TOCP, a larger dataset of Chinese profanity. This dataset contains natural sentences collected from social media sites, the profane expressions appearing in the sentences, and their rephrasing suggestions which preserve their meanings in a less offensive way. We proposed several baseline systems using neural network models to test this benchmark. We trained embedding models on a profanity-related dataset and proposed several profanity-related features. Our baseline systems achieved an F1-score of 86.37{\%} in profanity detection and an accuracy of 77.32{\%} in profanity rephrasing.",
language = "English",
ISBN = "979-10-95546-56-6",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-lin-2020-tocp">
<titleInfo>
<title>TOCP: A Dataset for Chinese Profanity Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hsu</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuan-Jie</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bornini</namePart>
<namePart type="family">Lahiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanessa</namePart>
<namePart type="family">Murdock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Kadar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-56-6</identifier>
</relatedItem>
<abstract>This paper introduced TOCP, a larger dataset of Chinese profanity. This dataset contains natural sentences collected from social media sites, the profane expressions appearing in the sentences, and their rephrasing suggestions which preserve their meanings in a less offensive way. We proposed several baseline systems using neural network models to test this benchmark. We trained embedding models on a profanity-related dataset and proposed several profanity-related features. Our baseline systems achieved an F1-score of 86.37% in profanity detection and an accuracy of 77.32% in profanity rephrasing.</abstract>
<identifier type="citekey">yang-lin-2020-tocp</identifier>
<location>
<url>https://aclanthology.org/2020.trac-1.2</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>6</start>
<end>12</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TOCP: A Dataset for Chinese Profanity Processing
%A Yang, Hsu
%A Lin, Chuan-Jie
%Y Kumar, Ritesh
%Y Ojha, Atul Kr.
%Y Lahiri, Bornini
%Y Zampieri, Marcos
%Y Malmasi, Shervin
%Y Murdock, Vanessa
%Y Kadar, Daniel
%S Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-56-6
%G English
%F yang-lin-2020-tocp
%X This paper introduced TOCP, a larger dataset of Chinese profanity. This dataset contains natural sentences collected from social media sites, the profane expressions appearing in the sentences, and their rephrasing suggestions which preserve their meanings in a less offensive way. We proposed several baseline systems using neural network models to test this benchmark. We trained embedding models on a profanity-related dataset and proposed several profanity-related features. Our baseline systems achieved an F1-score of 86.37% in profanity detection and an accuracy of 77.32% in profanity rephrasing.
%U https://aclanthology.org/2020.trac-1.2
%P 6-12
Markdown (Informal)
[TOCP: A Dataset for Chinese Profanity Processing](https://aclanthology.org/2020.trac-1.2) (Yang & Lin, TRAC 2020)
ACL
- Hsu Yang and Chuan-Jie Lin. 2020. TOCP: A Dataset for Chinese Profanity Processing. In Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying, pages 6–12, Marseille, France. European Language Resources Association (ELRA).