@inproceedings{wu-etal-2018-decipherment,
title = "Decipherment for Adversarial Offensive Language Detection",
author = "Wu, Zhelun and
Kambhatla, Nishant and
Sarkar, Anoop",
editor = "Fi{\v{s}}er, Darja and
Huang, Ruihong and
Prabhakaran, Vinodkumar and
Voigt, Rob and
Waseem, Zeerak and
Wernimont, Jacqueline",
booktitle = "Proceedings of the 2nd Workshop on Abusive Language Online ({ALW}2)",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-5119",
doi = "10.18653/v1/W18-5119",
pages = "149--159",
abstract = "Automated filters are commonly used by online services to stop users from sending age-inappropriate, bullying messages, or asking others to expose personal information. Previous work has focused on rules or classifiers to detect and filter offensive messages, but these are vulnerable to cleverly disguised plaintext and unseen expressions especially in an adversarial setting where the users can repeatedly try to bypass the filter. In this paper, we model the disguised messages as if they are produced by encrypting the original message using an invented cipher. We apply automatic decipherment techniques to decode the disguised malicious text, which can be then filtered using rules or classifiers. We provide experimental results on three different datasets and show that decipherment is an effective tool for this task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2018-decipherment">
<titleInfo>
<title>Decipherment for Adversarial Offensive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhelun</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nishant</namePart>
<namePart type="family">Kambhatla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Darja</namePart>
<namePart type="family">Fišer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Voigt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacqueline</namePart>
<namePart type="family">Wernimont</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automated filters are commonly used by online services to stop users from sending age-inappropriate, bullying messages, or asking others to expose personal information. Previous work has focused on rules or classifiers to detect and filter offensive messages, but these are vulnerable to cleverly disguised plaintext and unseen expressions especially in an adversarial setting where the users can repeatedly try to bypass the filter. In this paper, we model the disguised messages as if they are produced by encrypting the original message using an invented cipher. We apply automatic decipherment techniques to decode the disguised malicious text, which can be then filtered using rules or classifiers. We provide experimental results on three different datasets and show that decipherment is an effective tool for this task.</abstract>
<identifier type="citekey">wu-etal-2018-decipherment</identifier>
<identifier type="doi">10.18653/v1/W18-5119</identifier>
<location>
<url>https://aclanthology.org/W18-5119</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>149</start>
<end>159</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Decipherment for Adversarial Offensive Language Detection
%A Wu, Zhelun
%A Kambhatla, Nishant
%A Sarkar, Anoop
%Y Fišer, Darja
%Y Huang, Ruihong
%Y Prabhakaran, Vinodkumar
%Y Voigt, Rob
%Y Waseem, Zeerak
%Y Wernimont, Jacqueline
%S Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F wu-etal-2018-decipherment
%X Automated filters are commonly used by online services to stop users from sending age-inappropriate, bullying messages, or asking others to expose personal information. Previous work has focused on rules or classifiers to detect and filter offensive messages, but these are vulnerable to cleverly disguised plaintext and unseen expressions especially in an adversarial setting where the users can repeatedly try to bypass the filter. In this paper, we model the disguised messages as if they are produced by encrypting the original message using an invented cipher. We apply automatic decipherment techniques to decode the disguised malicious text, which can be then filtered using rules or classifiers. We provide experimental results on three different datasets and show that decipherment is an effective tool for this task.
%R 10.18653/v1/W18-5119
%U https://aclanthology.org/W18-5119
%U https://doi.org/10.18653/v1/W18-5119
%P 149-159
Markdown (Informal)
[Decipherment for Adversarial Offensive Language Detection](https://aclanthology.org/W18-5119) (Wu et al., ALW 2018)
ACL