@inproceedings{bevendorff-etal-2019-heuristic,
title = "Heuristic Authorship Obfuscation",
author = "Bevendorff, Janek and
Potthast, Martin and
Hagen, Matthias and
Stein, Benno",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1104",
doi = "10.18653/v1/P19-1104",
pages = "1098--1108",
abstract = "Authorship verification is the task of determining whether two texts were written by the same author. We deal with the adversary task, called authorship obfuscation: preventing verification by altering a to-be-obfuscated text. Our new obfuscation approach (1) models writing style difference as the Jensen-Shannon distance between the character n-gram distributions of texts, and (2) manipulates an author{'}s subconsciously encoded writing style in a sophisticated manner using heuristic search. To obfuscate, we analyze the huge space of textual variants for a paraphrased version of the to-be-obfuscated text that has a sufficient Jensen-Shannon distance at minimal costs in terms of text quality. We analyze, quantify, and illustrate the rationale of this approach, define paraphrasing operators, derive obfuscation thresholds, and develop an effective obfuscation framework. Our authorship obfuscation approach defeats state-of-the-art verification approaches, including unmasking and compression models, while keeping text changes at a minimum.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bevendorff-etal-2019-heuristic">
<titleInfo>
<title>Heuristic Authorship Obfuscation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Janek</namePart>
<namePart type="family">Bevendorff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Potthast</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Hagen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benno</namePart>
<namePart type="family">Stein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Authorship verification is the task of determining whether two texts were written by the same author. We deal with the adversary task, called authorship obfuscation: preventing verification by altering a to-be-obfuscated text. Our new obfuscation approach (1) models writing style difference as the Jensen-Shannon distance between the character n-gram distributions of texts, and (2) manipulates an author’s subconsciously encoded writing style in a sophisticated manner using heuristic search. To obfuscate, we analyze the huge space of textual variants for a paraphrased version of the to-be-obfuscated text that has a sufficient Jensen-Shannon distance at minimal costs in terms of text quality. We analyze, quantify, and illustrate the rationale of this approach, define paraphrasing operators, derive obfuscation thresholds, and develop an effective obfuscation framework. Our authorship obfuscation approach defeats state-of-the-art verification approaches, including unmasking and compression models, while keeping text changes at a minimum.</abstract>
<identifier type="citekey">bevendorff-etal-2019-heuristic</identifier>
<identifier type="doi">10.18653/v1/P19-1104</identifier>
<location>
<url>https://aclanthology.org/P19-1104</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>1098</start>
<end>1108</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Heuristic Authorship Obfuscation
%A Bevendorff, Janek
%A Potthast, Martin
%A Hagen, Matthias
%A Stein, Benno
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F bevendorff-etal-2019-heuristic
%X Authorship verification is the task of determining whether two texts were written by the same author. We deal with the adversary task, called authorship obfuscation: preventing verification by altering a to-be-obfuscated text. Our new obfuscation approach (1) models writing style difference as the Jensen-Shannon distance between the character n-gram distributions of texts, and (2) manipulates an author’s subconsciously encoded writing style in a sophisticated manner using heuristic search. To obfuscate, we analyze the huge space of textual variants for a paraphrased version of the to-be-obfuscated text that has a sufficient Jensen-Shannon distance at minimal costs in terms of text quality. We analyze, quantify, and illustrate the rationale of this approach, define paraphrasing operators, derive obfuscation thresholds, and develop an effective obfuscation framework. Our authorship obfuscation approach defeats state-of-the-art verification approaches, including unmasking and compression models, while keeping text changes at a minimum.
%R 10.18653/v1/P19-1104
%U https://aclanthology.org/P19-1104
%U https://doi.org/10.18653/v1/P19-1104
%P 1098-1108
Markdown (Informal)
[Heuristic Authorship Obfuscation](https://aclanthology.org/P19-1104) (Bevendorff et al., ACL 2019)
ACL
- Janek Bevendorff, Martin Potthast, Matthias Hagen, and Benno Stein. 2019. Heuristic Authorship Obfuscation. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 1098–1108, Florence, Italy. Association for Computational Linguistics.