@inproceedings{saroufim-etal-2018-language,
title = "Language Independent Sentiment Analysis with Sentiment-Specific Word Embeddings",
author = "Saroufim, Carl and
Almatarky, Akram and
Abdel Hady, Mohammad",
editor = "Balahur, Alexandra and
Mohammad, Saif M. and
Hoste, Veronique and
Klinger, Roman",
booktitle = "Proceedings of the 9th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6204",
doi = "10.18653/v1/W18-6204",
pages = "14--23",
abstract = "Data annotation is a critical step to train a text model but it is tedious, expensive and time-consuming. We present a language independent method to train a sentiment polarity model with limited amount of manually-labeled data. Word embeddings such as Word2Vec are efficient at incorporating semantic and syntactic properties of words, yielding good results for document classification. However, these embeddings might map words with opposite polarities, to vectors close to each other. We train Sentiment Specific Word Embeddings (SSWE) on top of an unsupervised Word2Vec model, using either Recurrent Neural Networks (RNN) or Convolutional Neural Networks (CNN) on data auto-labeled as {``}Positive{''} or {``}Negative{''}. For this task, we rely on the universality of emojis and emoticons to auto-label a large number of French tweets using a small set of positive and negative emojis and emoticons. Finally, we apply a transfer learning approach to refine the network weights with a small-size manually-labeled training data set. Experiments are conducted to evaluate the performance of this approach on French sentiment classification using benchmark data sets from SemEval 2016 competition. We were able to achieve a performance improvement by using SSWE over Word2Vec. We also used a graph-based approach for label propagation to auto-generate a sentiment lexicon.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saroufim-etal-2018-language">
<titleInfo>
<title>Language Independent Sentiment Analysis with Sentiment-Specific Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carl</namePart>
<namePart type="family">Saroufim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akram</namePart>
<namePart type="family">Almatarky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Abdel Hady</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Balahur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Klinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data annotation is a critical step to train a text model but it is tedious, expensive and time-consuming. We present a language independent method to train a sentiment polarity model with limited amount of manually-labeled data. Word embeddings such as Word2Vec are efficient at incorporating semantic and syntactic properties of words, yielding good results for document classification. However, these embeddings might map words with opposite polarities, to vectors close to each other. We train Sentiment Specific Word Embeddings (SSWE) on top of an unsupervised Word2Vec model, using either Recurrent Neural Networks (RNN) or Convolutional Neural Networks (CNN) on data auto-labeled as “Positive” or “Negative”. For this task, we rely on the universality of emojis and emoticons to auto-label a large number of French tweets using a small set of positive and negative emojis and emoticons. Finally, we apply a transfer learning approach to refine the network weights with a small-size manually-labeled training data set. Experiments are conducted to evaluate the performance of this approach on French sentiment classification using benchmark data sets from SemEval 2016 competition. We were able to achieve a performance improvement by using SSWE over Word2Vec. We also used a graph-based approach for label propagation to auto-generate a sentiment lexicon.</abstract>
<identifier type="citekey">saroufim-etal-2018-language</identifier>
<identifier type="doi">10.18653/v1/W18-6204</identifier>
<location>
<url>https://aclanthology.org/W18-6204</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>14</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Independent Sentiment Analysis with Sentiment-Specific Word Embeddings
%A Saroufim, Carl
%A Almatarky, Akram
%A Abdel Hady, Mohammad
%Y Balahur, Alexandra
%Y Mohammad, Saif M.
%Y Hoste, Veronique
%Y Klinger, Roman
%S Proceedings of the 9th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F saroufim-etal-2018-language
%X Data annotation is a critical step to train a text model but it is tedious, expensive and time-consuming. We present a language independent method to train a sentiment polarity model with limited amount of manually-labeled data. Word embeddings such as Word2Vec are efficient at incorporating semantic and syntactic properties of words, yielding good results for document classification. However, these embeddings might map words with opposite polarities, to vectors close to each other. We train Sentiment Specific Word Embeddings (SSWE) on top of an unsupervised Word2Vec model, using either Recurrent Neural Networks (RNN) or Convolutional Neural Networks (CNN) on data auto-labeled as “Positive” or “Negative”. For this task, we rely on the universality of emojis and emoticons to auto-label a large number of French tweets using a small set of positive and negative emojis and emoticons. Finally, we apply a transfer learning approach to refine the network weights with a small-size manually-labeled training data set. Experiments are conducted to evaluate the performance of this approach on French sentiment classification using benchmark data sets from SemEval 2016 competition. We were able to achieve a performance improvement by using SSWE over Word2Vec. We also used a graph-based approach for label propagation to auto-generate a sentiment lexicon.
%R 10.18653/v1/W18-6204
%U https://aclanthology.org/W18-6204
%U https://doi.org/10.18653/v1/W18-6204
%P 14-23
Markdown (Informal)
[Language Independent Sentiment Analysis with Sentiment-Specific Word Embeddings](https://aclanthology.org/W18-6204) (Saroufim et al., WASSA 2018)
ACL