@inproceedings{simaki-etal-2017-identifying,
title = "Identifying the Authors{'} National Variety of {E}nglish in Social Media Texts",
author = "Simaki, Vasiliki and
Simakis, Panagiotis and
Paradis, Carita and
Kerren, Andreas",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_086",
doi = "10.26615/978-954-452-049-6_086",
pages = "671--678",
abstract = "In this paper, we present a study for the identification of authors{'} national variety of English in texts from social media. In data from Facebook and Twitter, information about the author{'}s social profile is annotated, and the national English variety (US, UK, AUS, CAN, NNS) that each author uses is attributed. We tested four feature types: formal linguistic features, POS features, lexicon-based features related to the different varieties, and data-based features from each English variety. We used various machine learning algorithms for the classification experiments, and we implemented a feature selection process. The classification accuracy achieved, when the 31 highest ranked features were used, was up to 77.32{\%}. The experimental results are evaluated, and the efficacy of the ranked features discussed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="simaki-etal-2017-identifying">
<titleInfo>
<title>Identifying the Authors’ National Variety of English in Social Media Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vasiliki</namePart>
<namePart type="family">Simaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Panagiotis</namePart>
<namePart type="family">Simakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carita</namePart>
<namePart type="family">Paradis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Kerren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present a study for the identification of authors’ national variety of English in texts from social media. In data from Facebook and Twitter, information about the author’s social profile is annotated, and the national English variety (US, UK, AUS, CAN, NNS) that each author uses is attributed. We tested four feature types: formal linguistic features, POS features, lexicon-based features related to the different varieties, and data-based features from each English variety. We used various machine learning algorithms for the classification experiments, and we implemented a feature selection process. The classification accuracy achieved, when the 31 highest ranked features were used, was up to 77.32%. The experimental results are evaluated, and the efficacy of the ranked features discussed.</abstract>
<identifier type="citekey">simaki-etal-2017-identifying</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_086</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>671</start>
<end>678</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying the Authors’ National Variety of English in Social Media Texts
%A Simaki, Vasiliki
%A Simakis, Panagiotis
%A Paradis, Carita
%A Kerren, Andreas
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F simaki-etal-2017-identifying
%X In this paper, we present a study for the identification of authors’ national variety of English in texts from social media. In data from Facebook and Twitter, information about the author’s social profile is annotated, and the national English variety (US, UK, AUS, CAN, NNS) that each author uses is attributed. We tested four feature types: formal linguistic features, POS features, lexicon-based features related to the different varieties, and data-based features from each English variety. We used various machine learning algorithms for the classification experiments, and we implemented a feature selection process. The classification accuracy achieved, when the 31 highest ranked features were used, was up to 77.32%. The experimental results are evaluated, and the efficacy of the ranked features discussed.
%R 10.26615/978-954-452-049-6_086
%U https://doi.org/10.26615/978-954-452-049-6_086
%P 671-678
Markdown (Informal)
[Identifying the Authors’ National Variety of English in Social Media Texts](https://doi.org/10.26615/978-954-452-049-6_086) (Simaki et al., RANLP 2017)
ACL