@inproceedings{grzybowski-etal-2019-sparse,
title = "Sparse Coding in Authorship Attribution for {P}olish Tweets",
author = "Grzybowski, Piotr and
Juralewicz, Ewa and
Piasecki, Maciej",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1048",
doi = "10.26615/978-954-452-056-4_048",
pages = "409--417",
abstract = "The study explores application of a simple Convolutional Neural Network for the problem of authorship attribution of tweets written in Polish. In our solution we use two-step compression of tweets using Byte Pair Encoding algorithm and vectorisation as an input to the distributional model generated for the large corpus of Polish tweets by word2vec algorithm. Our method achieves results comparable to the state-of-the-art approaches for the similar task on English tweets and expresses a very good performance in the classification of Polish tweets. We tested the proposed method in relation to the number of authors and tweets per author. We also juxtaposed results for authors with different topic backgrounds against each other.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="grzybowski-etal-2019-sparse">
<titleInfo>
<title>Sparse Coding in Authorship Attribution for Polish Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Grzybowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewa</namePart>
<namePart type="family">Juralewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Piasecki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The study explores application of a simple Convolutional Neural Network for the problem of authorship attribution of tweets written in Polish. In our solution we use two-step compression of tweets using Byte Pair Encoding algorithm and vectorisation as an input to the distributional model generated for the large corpus of Polish tweets by word2vec algorithm. Our method achieves results comparable to the state-of-the-art approaches for the similar task on English tweets and expresses a very good performance in the classification of Polish tweets. We tested the proposed method in relation to the number of authors and tweets per author. We also juxtaposed results for authors with different topic backgrounds against each other.</abstract>
<identifier type="citekey">grzybowski-etal-2019-sparse</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_048</identifier>
<location>
<url>https://aclanthology.org/R19-1048</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>409</start>
<end>417</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sparse Coding in Authorship Attribution for Polish Tweets
%A Grzybowski, Piotr
%A Juralewicz, Ewa
%A Piasecki, Maciej
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F grzybowski-etal-2019-sparse
%X The study explores application of a simple Convolutional Neural Network for the problem of authorship attribution of tweets written in Polish. In our solution we use two-step compression of tweets using Byte Pair Encoding algorithm and vectorisation as an input to the distributional model generated for the large corpus of Polish tweets by word2vec algorithm. Our method achieves results comparable to the state-of-the-art approaches for the similar task on English tweets and expresses a very good performance in the classification of Polish tweets. We tested the proposed method in relation to the number of authors and tweets per author. We also juxtaposed results for authors with different topic backgrounds against each other.
%R 10.26615/978-954-452-056-4_048
%U https://aclanthology.org/R19-1048
%U https://doi.org/10.26615/978-954-452-056-4_048
%P 409-417
Markdown (Informal)
[Sparse Coding in Authorship Attribution for Polish Tweets](https://aclanthology.org/R19-1048) (Grzybowski et al., RANLP 2019)
ACL