<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W17">
  <paper id="1100">
    <title>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</title>
    <editor>Lun-Wei Ku</editor>
    <editor>Cheng-Te Li</editor>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <url>http://www.aclweb.org/anthology/W17-11</url>
    <bibtype>book</bibtype>
    <bibkey>SocialNLP2017:2017</bibkey>
  </paper>

  <paper id="1101">
    <title>A Survey on Hate Speech Detection using Natural Language Processing</title>
    <author><first>Anna</first><last>Schmidt</last></author>
    <author><first>Michael</first><last>Wiegand</last></author>
    <booktitle>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>1&#8211;10</pages>
    <url>http://www.aclweb.org/anthology/W17-1101</url>
    <abstract>This paper presents a survey on hate speech detection. Given the steadily
	growing body of social media content, the amount of online hate speech is also
	increasing. Due to the massive scale of the web, methods that automatically
	detect hate speech are required. Our survey describes key areas that have been
	explored to automatically recognize these types of utterances using natural
	language processing. We also discuss limits of those approaches.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>schmidt-wiegand:2017:SocialNLP2017</bibkey>
  </paper>

  <paper id="1102">
    <title>Facebook sentiment: Reactions and Emojis</title>
    <author><first>Ye</first><last>Tian</last></author>
    <author><first>Thiago</first><last>Galery</last></author>
    <author><first>Giulio</first><last>Dulcinati</last></author>
    <author><first>Emilia</first><last>Molimpakis</last></author>
    <author><first>Chao</first><last>Sun</last></author>
    <booktitle>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>11&#8211;16</pages>
    <url>http://www.aclweb.org/anthology/W17-1102</url>
    <abstract>Emojis are used frequently in social media. A widely assumed view is that
	emojis express the emotional state of the user, which has led to research
	focusing on the expressiveness of emojis independent from the linguistic
	context. We argue that emojis and the linguistic texts can modify the meaning
	of each other. The overall communicated meaning is not a simple sum of the two
	channels. 
	In order to study the meaning interplay, we need data indicating the overall
	sentiment of the entire message as well as the sentiment of the emojis
	stand-alone. We propose that Facebook Reactions are a good data source for such
	a purpose. FB reactions (e.g. &#x201c;Love&#x201d; and &#x201c;Angry&#x201d;) indicate the readers'
	overall sentiment, against which we can investigate the types of emojis used
	the comments under different reaction profiles. We present a data set of 21,000
	FB posts (57 million reactions and 8 million comments) from public media pages
	across four countries.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>tian-EtAl:2017:SocialNLP2017</bibkey>
  </paper>

  <paper id="1103">
    <title>Potential and Limitations of Cross-Domain Sentiment Classification</title>
    <author><first>Jan Milan</first><last>Deriu</last></author>
    <author><first>Martin</first><last>Weilenmann</last></author>
    <author><first>Dirk</first><last>Von Gruenigen</last></author>
    <author><first>Mark</first><last>Cieliebak</last></author>
    <booktitle>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>17&#8211;24</pages>
    <url>http://www.aclweb.org/anthology/W17-1103</url>
    <abstract>In this paper we investigate the cross-domain performance of a current
	state-of-the-art sentiment analysis systems. For this purpose we train a
	convolutional neural network (CNN) on data from different domains and evaluate
	its performance on other domains. Furthermore, we evaluate the usefulness of
	combining a large amount of different smaller annotated corpora to a large
	corpus. Our results show that more sophisticated approaches are required to
	train a system that works equally well on various domains.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>deriu-EtAl:2017:SocialNLP2017</bibkey>
  </paper>

  <paper id="1104">
    <title>Aligning Entity Names with Online Aliases on Twitter</title>
    <author><first>Kevin</first><last>McKelvey</last></author>
    <author><first>Peter</first><last>Goutzounis</last></author>
    <author><first>Stephen</first><last>da Cruz</last></author>
    <author><first>Nathanael</first><last>Chambers</last></author>
    <booktitle>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>25&#8211;35</pages>
    <url>http://www.aclweb.org/anthology/W17-1104</url>
    <abstract>This paper presents new models that automatically align online aliases with
	their real entity names. Many research applications rely on identifying entity
	names in text, but people often refer to entities with unexpected nicknames and
	aliases. For example, The King and King James are aliases for Lebron James, a
	professional basketball player. Recent work on entity linking attempts to
	resolve mentions to knowledge base entries, like a wikipedia page, but linking
	is unfortunately limited to well-known entities with pre-built pages. This
	paper asks a more basic question: can aliases be aligned without background
	knowledge of the entity? Further, can the semantics surrounding alias mentions
	be used to inform alignments? We describe statistical models that make
	decisions based on the lexicographic properties of the aliases with their
	semantic context in a large corpus of tweets. We experiment on a database of
	Twitter users and their usernames, and present the first human evaluation for
	this task. Alignment accuracy approaches human performance at 81%, and we show
	that while lexicographic features are most important, the semantic context of
	an alias further improves classification accuracy.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>mckelvey-EtAl:2017:SocialNLP2017</bibkey>
  </paper>

  <paper id="1105">
    <title>Character-based Neural Embeddings for Tweet Clustering</title>
    <author><first>Svitlana</first><last>Vakulenko</last></author>
    <author><first>Lyndon</first><last>Nixon</last></author>
    <author><first>Mihai</first><last>Lupu</last></author>
    <booktitle>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>36&#8211;44</pages>
    <url>http://www.aclweb.org/anthology/W17-1105</url>
    <abstract>In this paper we show how the performance of tweet clustering can be improved
	by leveraging character-based neural networks. The proposed approach overcomes
	the limitations related to the vocabulary explosion in the word-based models
	and allows for the seamless processing of the multilingual content. Our
	evaluation results and code are available on-line:
	https://github.com/vendi12/tweet2vec_clustering.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>vakulenko-nixon-lupu:2017:SocialNLP2017</bibkey>
  </paper>

  <paper id="1106">
    <title>A Twitter Corpus and Benchmark Resources for German Sentiment Analysis</title>
    <author><first>Mark</first><last>Cieliebak</last></author>
    <author><first>Jan Milan</first><last>Deriu</last></author>
    <author><first>Dominic</first><last>Egger</last></author>
    <author><first>Fatih</first><last>Uzdilli</last></author>
    <booktitle>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>45&#8211;51</pages>
    <url>http://www.aclweb.org/anthology/W17-1106</url>
    <abstract>In this paper we present SB10k, a new corpus for sentiment analysis with
	approx. 10,000 German tweets. 
	We use this new corpus and two existing corpora to provide state-of-the-art
	benchmarks for sentiment analysis in German: we implemented a CNN (based on the
	winning system of SemEval-2016) and a feature-based SVM and compare their
	performance on all three corpora. 
	For the CNN, we also created German word embeddings trained on 300M tweets.
	These word embeddings were then optimized for sentiment analysis using
	distant-supervised
	learning. 
	The new corpus, the German word embeddings (plain and optimized), and 
	source code to re-run the benchmarks are publicly available.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>cieliebak-EtAl:2017:SocialNLP2017</bibkey>
  </paper>

</volume>

