@inproceedings{joshi-zincir-heywood-2019-classification,
title = "Classification of Micro-Texts Using Sub-Word Embeddings",
author = "Joshi, Mihir and
Zincir-Heywood, Nur",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1062",
doi = "10.26615/978-954-452-056-4_062",
pages = "526--533",
abstract = "Extracting features and writing styles from short text messages is always a challenge. Short messages, like tweets, do not have enough data to perform statistical authorship attribution. Besides, the vocabulary used in these texts is sometimes improvised or misspelled. Therefore, in this paper, we propose combining four feature extraction techniques namely character n-grams, word n-grams, Flexible Patterns and a new sub-word embedding using the skip-gram model. Our system uses a Multi-Layer Perceptron to utilize these features from tweets to analyze short text messages. This proposed system achieves 85{\%} accuracy, which is a considerable improvement over previous systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="joshi-zincir-heywood-2019-classification">
<titleInfo>
<title>Classification of Micro-Texts Using Sub-Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihir</namePart>
<namePart type="family">Joshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nur</namePart>
<namePart type="family">Zincir-Heywood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Extracting features and writing styles from short text messages is always a challenge. Short messages, like tweets, do not have enough data to perform statistical authorship attribution. Besides, the vocabulary used in these texts is sometimes improvised or misspelled. Therefore, in this paper, we propose combining four feature extraction techniques namely character n-grams, word n-grams, Flexible Patterns and a new sub-word embedding using the skip-gram model. Our system uses a Multi-Layer Perceptron to utilize these features from tweets to analyze short text messages. This proposed system achieves 85% accuracy, which is a considerable improvement over previous systems.</abstract>
<identifier type="citekey">joshi-zincir-heywood-2019-classification</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_062</identifier>
<location>
<url>https://aclanthology.org/R19-1062</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>526</start>
<end>533</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Classification of Micro-Texts Using Sub-Word Embeddings
%A Joshi, Mihir
%A Zincir-Heywood, Nur
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F joshi-zincir-heywood-2019-classification
%X Extracting features and writing styles from short text messages is always a challenge. Short messages, like tweets, do not have enough data to perform statistical authorship attribution. Besides, the vocabulary used in these texts is sometimes improvised or misspelled. Therefore, in this paper, we propose combining four feature extraction techniques namely character n-grams, word n-grams, Flexible Patterns and a new sub-word embedding using the skip-gram model. Our system uses a Multi-Layer Perceptron to utilize these features from tweets to analyze short text messages. This proposed system achieves 85% accuracy, which is a considerable improvement over previous systems.
%R 10.26615/978-954-452-056-4_062
%U https://aclanthology.org/R19-1062
%U https://doi.org/10.26615/978-954-452-056-4_062
%P 526-533
Markdown (Informal)
[Classification of Micro-Texts Using Sub-Word Embeddings](https://aclanthology.org/R19-1062) (Joshi & Zincir-Heywood, RANLP 2019)
ACL