@inproceedings{kameswara-sarma-2018-learning,
title = "Learning Word Embeddings for Data Sparse and Sentiment Rich Data Sets",
author = "Kameswara Sarma, Prathusha",
editor = "Cordeiro, Silvio Ricardo and
Oraby, Shereen and
Pavalanathan, Umashanthi and
Rim, Kyeongmin",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = jun,
year = "2018",
address = "New Orleans, Louisiana, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N18-4007",
doi = "10.18653/v1/N18-4007",
pages = "46--53",
abstract = "This research proposal describes two algorithms that are aimed at learning word embeddings for data sparse and sentiment rich data sets. The goal is to use word embeddings adapted for domain specific data sets in downstream applications such as sentiment classification. The first approach learns word embeddings in a supervised fashion via SWESA (Supervised Word Embeddings for Sentiment Analysis), an algorithm for sentiment analysis on data sets that are of modest size. SWESA leverages document labels to jointly learn polarity-aware word embeddings and a classifier to classify unseen documents. In the second approach domain adapted (DA) word embeddings are learned by exploiting the specificity of domain specific data sets and the breadth of generic word embeddings. The new embeddings are formed by aligning corresponding word vectors using Canonical Correlation Analysis (CCA) or the related nonlinear Kernel CCA. Experimental results on binary sentiment classification tasks using both approaches for standard data sets are presented.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kameswara-sarma-2018-learning">
<titleInfo>
<title>Learning Word Embeddings for Data Sparse and Sentiment Rich Data Sets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prathusha</namePart>
<namePart type="family">Kameswara Sarma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Silvio</namePart>
<namePart type="given">Ricardo</namePart>
<namePart type="family">Cordeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shereen</namePart>
<namePart type="family">Oraby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Umashanthi</namePart>
<namePart type="family">Pavalanathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyeongmin</namePart>
<namePart type="family">Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This research proposal describes two algorithms that are aimed at learning word embeddings for data sparse and sentiment rich data sets. The goal is to use word embeddings adapted for domain specific data sets in downstream applications such as sentiment classification. The first approach learns word embeddings in a supervised fashion via SWESA (Supervised Word Embeddings for Sentiment Analysis), an algorithm for sentiment analysis on data sets that are of modest size. SWESA leverages document labels to jointly learn polarity-aware word embeddings and a classifier to classify unseen documents. In the second approach domain adapted (DA) word embeddings are learned by exploiting the specificity of domain specific data sets and the breadth of generic word embeddings. The new embeddings are formed by aligning corresponding word vectors using Canonical Correlation Analysis (CCA) or the related nonlinear Kernel CCA. Experimental results on binary sentiment classification tasks using both approaches for standard data sets are presented.</abstract>
<identifier type="citekey">kameswara-sarma-2018-learning</identifier>
<identifier type="doi">10.18653/v1/N18-4007</identifier>
<location>
<url>https://aclanthology.org/N18-4007</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>46</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Word Embeddings for Data Sparse and Sentiment Rich Data Sets
%A Kameswara Sarma, Prathusha
%Y Cordeiro, Silvio Ricardo
%Y Oraby, Shereen
%Y Pavalanathan, Umashanthi
%Y Rim, Kyeongmin
%S Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana, USA
%F kameswara-sarma-2018-learning
%X This research proposal describes two algorithms that are aimed at learning word embeddings for data sparse and sentiment rich data sets. The goal is to use word embeddings adapted for domain specific data sets in downstream applications such as sentiment classification. The first approach learns word embeddings in a supervised fashion via SWESA (Supervised Word Embeddings for Sentiment Analysis), an algorithm for sentiment analysis on data sets that are of modest size. SWESA leverages document labels to jointly learn polarity-aware word embeddings and a classifier to classify unseen documents. In the second approach domain adapted (DA) word embeddings are learned by exploiting the specificity of domain specific data sets and the breadth of generic word embeddings. The new embeddings are formed by aligning corresponding word vectors using Canonical Correlation Analysis (CCA) or the related nonlinear Kernel CCA. Experimental results on binary sentiment classification tasks using both approaches for standard data sets are presented.
%R 10.18653/v1/N18-4007
%U https://aclanthology.org/N18-4007
%U https://doi.org/10.18653/v1/N18-4007
%P 46-53
Markdown (Informal)
[Learning Word Embeddings for Data Sparse and Sentiment Rich Data Sets](https://aclanthology.org/N18-4007) (Kameswara Sarma, NAACL 2018)
ACL