@InProceedings{cieliebak-EtAl:2017:SocialNLP2017,
  author    = {Cieliebak, Mark  and  Deriu, Jan Milan  and  Egger, Dominic  and  Uzdilli, Fatih},
  title     = {A Twitter Corpus and Benchmark Resources for German Sentiment Analysis},
  booktitle = {Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {45--51},
  abstract  = {In this paper we present SB10k, a new corpus for sentiment analysis with
	approx. 10,000 German tweets. 
	We use this new corpus and two existing corpora to provide state-of-the-art
	benchmarks for sentiment analysis in German: we implemented a CNN (based on the
	winning system of SemEval-2016) and a feature-based SVM and compare their
	performance on all three corpora. 
	For the CNN, we also created German word embeddings trained on 300M tweets.
	These word embeddings were then optimized for sentiment analysis using
	distant-supervised
	learning. 
	The new corpus, the German word embeddings (plain and optimized), and 
	source code to re-run the benchmarks are publicly available.},
  url       = {http://www.aclweb.org/anthology/W17-1106}
}

