@inproceedings{tschuggnall-etal-2019-reduce,
title = "Reduce {\&} Attribute: Two-Step Authorship Attribution for Large-Scale Problems",
author = {Tschuggnall, Michael and
Murauer, Benjamin and
Specht, G{\"u}nther},
editor = "Bansal, Mohit and
Villavicencio, Aline",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K19-1089",
doi = "10.18653/v1/K19-1089",
pages = "951--960",
abstract = "Authorship attribution is an active research area which has been prevalent for many decades. Nevertheless, the majority of approaches consider problem sizes of a few candidate authors only, making them difficult to apply to recent scenarios incorporating thousands of authors emerging due to the manifold means to digitally share text. In this study, we focus on such large-scale problems and propose to effectively reduce the number of candidate authors before applying common attribution techniques. By utilizing document embeddings, we show on a novel, comprehensive dataset collection that the set of candidate authors can be reduced with high accuracy. Moreover, we show that common authorship attribution methods substantially benefit from a preliminary reduction if thousands of authors are involved.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tschuggnall-etal-2019-reduce">
<titleInfo>
<title>Reduce & Attribute: Two-Step Authorship Attribution for Large-Scale Problems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Tschuggnall</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Murauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Günther</namePart>
<namePart type="family">Specht</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Authorship attribution is an active research area which has been prevalent for many decades. Nevertheless, the majority of approaches consider problem sizes of a few candidate authors only, making them difficult to apply to recent scenarios incorporating thousands of authors emerging due to the manifold means to digitally share text. In this study, we focus on such large-scale problems and propose to effectively reduce the number of candidate authors before applying common attribution techniques. By utilizing document embeddings, we show on a novel, comprehensive dataset collection that the set of candidate authors can be reduced with high accuracy. Moreover, we show that common authorship attribution methods substantially benefit from a preliminary reduction if thousands of authors are involved.</abstract>
<identifier type="citekey">tschuggnall-etal-2019-reduce</identifier>
<identifier type="doi">10.18653/v1/K19-1089</identifier>
<location>
<url>https://aclanthology.org/K19-1089</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>951</start>
<end>960</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reduce & Attribute: Two-Step Authorship Attribution for Large-Scale Problems
%A Tschuggnall, Michael
%A Murauer, Benjamin
%A Specht, Günther
%Y Bansal, Mohit
%Y Villavicencio, Aline
%S Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F tschuggnall-etal-2019-reduce
%X Authorship attribution is an active research area which has been prevalent for many decades. Nevertheless, the majority of approaches consider problem sizes of a few candidate authors only, making them difficult to apply to recent scenarios incorporating thousands of authors emerging due to the manifold means to digitally share text. In this study, we focus on such large-scale problems and propose to effectively reduce the number of candidate authors before applying common attribution techniques. By utilizing document embeddings, we show on a novel, comprehensive dataset collection that the set of candidate authors can be reduced with high accuracy. Moreover, we show that common authorship attribution methods substantially benefit from a preliminary reduction if thousands of authors are involved.
%R 10.18653/v1/K19-1089
%U https://aclanthology.org/K19-1089
%U https://doi.org/10.18653/v1/K19-1089
%P 951-960
Markdown (Informal)
[Reduce & Attribute: Two-Step Authorship Attribution for Large-Scale Problems](https://aclanthology.org/K19-1089) (Tschuggnall et al., CoNLL 2019)
ACL