@inproceedings{yuksel-etal-2019-turkish,
title = "{T}urkish Tweet Classification with Transformer Encoder",
author = {Y{\"u}ksel, At{\i}f Emre and
T{\"u}rkmen, Ya{\c{s}}ar Alim and
{\"O}zg{\"u}r, Arzucan and
Alt{\i}nel, Berna},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1158/",
doi = "10.26615/978-954-452-056-4_158",
pages = "1380--1387",
abstract = "Short-text classification is a challenging task, due to the sparsity and high dimensionality of the feature space. In this study, we aim to analyze and classify Turkish tweets based on their topics. Social media jargon and the agglutinative structure of the Turkish language makes this classification task even harder. As far as we know, this is the first study that uses a Transformer Encoder for short text classification in Turkish. The model is trained in a weakly supervised manner, where the training data set has been labeled automatically. Our results on the test set, which has been manually labeled, show that performing morphological analysis improves the classification performance of the traditional machine learning algorithms Random Forest, Naive Bayes, and Support Vector Machines. Still, the proposed approach achieves an F-score of 89.3 {\%} outperforming those algorithms by at least 5 points."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yuksel-etal-2019-turkish">
<titleInfo>
<title>Turkish Tweet Classification with Transformer Encoder</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atıf</namePart>
<namePart type="given">Emre</namePart>
<namePart type="family">Yüksel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaşar</namePart>
<namePart type="given">Alim</namePart>
<namePart type="family">Türkmen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arzucan</namePart>
<namePart type="family">Özgür</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Berna</namePart>
<namePart type="family">Altınel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Short-text classification is a challenging task, due to the sparsity and high dimensionality of the feature space. In this study, we aim to analyze and classify Turkish tweets based on their topics. Social media jargon and the agglutinative structure of the Turkish language makes this classification task even harder. As far as we know, this is the first study that uses a Transformer Encoder for short text classification in Turkish. The model is trained in a weakly supervised manner, where the training data set has been labeled automatically. Our results on the test set, which has been manually labeled, show that performing morphological analysis improves the classification performance of the traditional machine learning algorithms Random Forest, Naive Bayes, and Support Vector Machines. Still, the proposed approach achieves an F-score of 89.3 % outperforming those algorithms by at least 5 points.</abstract>
<identifier type="citekey">yuksel-etal-2019-turkish</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_158</identifier>
<location>
<url>https://aclanthology.org/R19-1158/</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>1380</start>
<end>1387</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Turkish Tweet Classification with Transformer Encoder
%A Yüksel, Atıf Emre
%A Türkmen, Yaşar Alim
%A Özgür, Arzucan
%A Altınel, Berna
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F yuksel-etal-2019-turkish
%X Short-text classification is a challenging task, due to the sparsity and high dimensionality of the feature space. In this study, we aim to analyze and classify Turkish tweets based on their topics. Social media jargon and the agglutinative structure of the Turkish language makes this classification task even harder. As far as we know, this is the first study that uses a Transformer Encoder for short text classification in Turkish. The model is trained in a weakly supervised manner, where the training data set has been labeled automatically. Our results on the test set, which has been manually labeled, show that performing morphological analysis improves the classification performance of the traditional machine learning algorithms Random Forest, Naive Bayes, and Support Vector Machines. Still, the proposed approach achieves an F-score of 89.3 % outperforming those algorithms by at least 5 points.
%R 10.26615/978-954-452-056-4_158
%U https://aclanthology.org/R19-1158/
%U https://doi.org/10.26615/978-954-452-056-4_158
%P 1380-1387
Markdown (Informal)
[Turkish Tweet Classification with Transformer Encoder](https://aclanthology.org/R19-1158/) (Yüksel et al., RANLP 2019)
ACL
- Atıf Emre Yüksel, Yaşar Alim Türkmen, Arzucan Özgür, and Berna Altınel. 2019. Turkish Tweet Classification with Transformer Encoder. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pages 1380–1387, Varna, Bulgaria. INCOMA Ltd..