@inproceedings{cieliebak-etal-2017-twitter,
title = "A {T}witter Corpus and Benchmark Resources for {G}erman Sentiment Analysis",
author = "Cieliebak, Mark and
Deriu, Jan Milan and
Egger, Dominic and
Uzdilli, Fatih",
editor = "Ku, Lun-Wei and
Li, Cheng-Te",
booktitle = "Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1106/",
doi = "10.18653/v1/W17-1106",
pages = "45--51",
abstract = "In this paper we present SB10k, a new corpus for sentiment analysis with approx. 10,000 German tweets. We use this new corpus and two existing corpora to provide state-of-the-art benchmarks for sentiment analysis in German: we implemented a CNN (based on the winning system of SemEval-2016) and a feature-based SVM and compare their performance on all three corpora. For the CNN, we also created German word embeddings trained on 300M tweets. These word embeddings were then optimized for sentiment analysis using distant-supervised learning. The new corpus, the German word embeddings (plain and optimized), and source code to re-run the benchmarks are publicly available."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cieliebak-etal-2017-twitter">
<titleInfo>
<title>A Twitter Corpus and Benchmark Resources for German Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Cieliebak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Milan</namePart>
<namePart type="family">Deriu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominic</namePart>
<namePart type="family">Egger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatih</namePart>
<namePart type="family">Uzdilli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng-Te</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present SB10k, a new corpus for sentiment analysis with approx. 10,000 German tweets. We use this new corpus and two existing corpora to provide state-of-the-art benchmarks for sentiment analysis in German: we implemented a CNN (based on the winning system of SemEval-2016) and a feature-based SVM and compare their performance on all three corpora. For the CNN, we also created German word embeddings trained on 300M tweets. These word embeddings were then optimized for sentiment analysis using distant-supervised learning. The new corpus, the German word embeddings (plain and optimized), and source code to re-run the benchmarks are publicly available.</abstract>
<identifier type="citekey">cieliebak-etal-2017-twitter</identifier>
<identifier type="doi">10.18653/v1/W17-1106</identifier>
<location>
<url>https://aclanthology.org/W17-1106/</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>45</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Twitter Corpus and Benchmark Resources for German Sentiment Analysis
%A Cieliebak, Mark
%A Deriu, Jan Milan
%A Egger, Dominic
%A Uzdilli, Fatih
%Y Ku, Lun-Wei
%Y Li, Cheng-Te
%S Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F cieliebak-etal-2017-twitter
%X In this paper we present SB10k, a new corpus for sentiment analysis with approx. 10,000 German tweets. We use this new corpus and two existing corpora to provide state-of-the-art benchmarks for sentiment analysis in German: we implemented a CNN (based on the winning system of SemEval-2016) and a feature-based SVM and compare their performance on all three corpora. For the CNN, we also created German word embeddings trained on 300M tweets. These word embeddings were then optimized for sentiment analysis using distant-supervised learning. The new corpus, the German word embeddings (plain and optimized), and source code to re-run the benchmarks are publicly available.
%R 10.18653/v1/W17-1106
%U https://aclanthology.org/W17-1106/
%U https://doi.org/10.18653/v1/W17-1106
%P 45-51
Markdown (Informal)
[A Twitter Corpus and Benchmark Resources for German Sentiment Analysis](https://aclanthology.org/W17-1106/) (Cieliebak et al., SocialNLP 2017)
ACL