@inproceedings{singh-lefever-2020-sentiment,
title = "Sentiment Analysis for {H}inglish Code-mixed Tweets by means of Cross-lingual Word Embeddings",
author = "Singh, Pranaydeep and
Lefever, Els",
editor = "Solorio, Thamar and
Choudhury, Monojit and
Bali, Kalika and
Sitaram, Sunayana and
Das, Amitava and
Diab, Mona",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Code Switching",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.calcs-1.6",
pages = "45--51",
abstract = "This paper investigates the use of unsupervised cross-lingual embeddings for solving the problem of code-mixed social media text understanding. We specifically investigate the use of these embeddings for a sentiment analysis task for Hinglish Tweets, viz. English combined with (transliterated) Hindi. In a first step, baseline models, initialized with monolingual embeddings obtained from large collections of tweets in English and code-mixed Hinglish, were trained. In a second step, two systems using cross-lingual embeddings were researched, being (1) a supervised classifier and (2) a transfer learning approach trained on English sentiment data and evaluated on code-mixed data. We demonstrate that incorporating cross-lingual embeddings improves the results (F1-score of 0.635 versus a monolingual baseline of 0.616), without any parallel data required to train the cross-lingual embeddings. In addition, the results show that the cross-lingual embeddings not only improve the results in a fully supervised setting, but they can also be used as a base for distant supervision, by training a sentiment model in one of the source languages and evaluating on the other language projected in the same space. The transfer learning experiments result in an F1-score of 0.556, which is almost on par with the supervised settings and speak to the robustness of the cross-lingual embeddings approach.",
language = "English",
ISBN = "979-10-95546-66-5",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-lefever-2020-sentiment">
<titleInfo>
<title>Sentiment Analysis for Hinglish Code-mixed Tweets by means of Cross-lingual Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pranaydeep</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Els</namePart>
<namePart type="family">Lefever</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Computational Approaches to Code Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amitava</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-66-5</identifier>
</relatedItem>
<abstract>This paper investigates the use of unsupervised cross-lingual embeddings for solving the problem of code-mixed social media text understanding. We specifically investigate the use of these embeddings for a sentiment analysis task for Hinglish Tweets, viz. English combined with (transliterated) Hindi. In a first step, baseline models, initialized with monolingual embeddings obtained from large collections of tweets in English and code-mixed Hinglish, were trained. In a second step, two systems using cross-lingual embeddings were researched, being (1) a supervised classifier and (2) a transfer learning approach trained on English sentiment data and evaluated on code-mixed data. We demonstrate that incorporating cross-lingual embeddings improves the results (F1-score of 0.635 versus a monolingual baseline of 0.616), without any parallel data required to train the cross-lingual embeddings. In addition, the results show that the cross-lingual embeddings not only improve the results in a fully supervised setting, but they can also be used as a base for distant supervision, by training a sentiment model in one of the source languages and evaluating on the other language projected in the same space. The transfer learning experiments result in an F1-score of 0.556, which is almost on par with the supervised settings and speak to the robustness of the cross-lingual embeddings approach.</abstract>
<identifier type="citekey">singh-lefever-2020-sentiment</identifier>
<location>
<url>https://aclanthology.org/2020.calcs-1.6</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>45</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentiment Analysis for Hinglish Code-mixed Tweets by means of Cross-lingual Word Embeddings
%A Singh, Pranaydeep
%A Lefever, Els
%Y Solorio, Thamar
%Y Choudhury, Monojit
%Y Bali, Kalika
%Y Sitaram, Sunayana
%Y Das, Amitava
%Y Diab, Mona
%S Proceedings of the 4th Workshop on Computational Approaches to Code Switching
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-66-5
%G English
%F singh-lefever-2020-sentiment
%X This paper investigates the use of unsupervised cross-lingual embeddings for solving the problem of code-mixed social media text understanding. We specifically investigate the use of these embeddings for a sentiment analysis task for Hinglish Tweets, viz. English combined with (transliterated) Hindi. In a first step, baseline models, initialized with monolingual embeddings obtained from large collections of tweets in English and code-mixed Hinglish, were trained. In a second step, two systems using cross-lingual embeddings were researched, being (1) a supervised classifier and (2) a transfer learning approach trained on English sentiment data and evaluated on code-mixed data. We demonstrate that incorporating cross-lingual embeddings improves the results (F1-score of 0.635 versus a monolingual baseline of 0.616), without any parallel data required to train the cross-lingual embeddings. In addition, the results show that the cross-lingual embeddings not only improve the results in a fully supervised setting, but they can also be used as a base for distant supervision, by training a sentiment model in one of the source languages and evaluating on the other language projected in the same space. The transfer learning experiments result in an F1-score of 0.556, which is almost on par with the supervised settings and speak to the robustness of the cross-lingual embeddings approach.
%U https://aclanthology.org/2020.calcs-1.6
%P 45-51
Markdown (Informal)
[Sentiment Analysis for Hinglish Code-mixed Tweets by means of Cross-lingual Word Embeddings](https://aclanthology.org/2020.calcs-1.6) (Singh & Lefever, CALCS 2020)
ACL