@inproceedings{laureano-de-leon-etal-2020-cs,
title = "{CS}-Embed at {S}em{E}val-2020 Task 9: The Effectiveness of Code-switched Word Embeddings for Sentiment Analysis",
author = "Laureano De Leon, Frances Adriana and
Gu{\'e}niat, Florimond and
Tayyar Madabushi, Harish",
editor = "Herbelot, Aurelie and
Zhu, Xiaodan and
Palmer, Alexis and
Schneider, Nathan and
May, Jonathan and
Shutova, Ekaterina",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.117",
doi = "10.18653/v1/2020.semeval-1.117",
pages = "922--927",
abstract = "The growing popularity and applications of sentiment analysis of social media posts has naturally led to sentiment analysis of posts written in multiple languages, a practice known as code-switching. While recent research into code-switched posts has focused on the use of multilingual word embeddings, these embeddings were not trained on code-switched data. In this work, we present word-embeddings trained on code-switched tweets, specifically those that make use of Spanish and English, known as Spanglish. We explore the embedding space to discover how they capture the meanings of words in both languages. We test the effectiveness of these embeddings by participating in SemEval 2020 Task 9: \textit{Sentiment Analysis on Code-Mixed Social Media Text}. We utilised them to train a sentiment classifier that achieves an F-1 score of 0.722. This is higher than the baseline for the competition of 0.656, with our team (codalab username francesita) ranking 14 out of 29 participating teams, beating the baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laureano-de-leon-etal-2020-cs">
<titleInfo>
<title>CS-Embed at SemEval-2020 Task 9: The Effectiveness of Code-switched Word Embeddings for Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frances</namePart>
<namePart type="given">Adriana</namePart>
<namePart type="family">Laureano De Leon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florimond</namePart>
<namePart type="family">Guéniat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The growing popularity and applications of sentiment analysis of social media posts has naturally led to sentiment analysis of posts written in multiple languages, a practice known as code-switching. While recent research into code-switched posts has focused on the use of multilingual word embeddings, these embeddings were not trained on code-switched data. In this work, we present word-embeddings trained on code-switched tweets, specifically those that make use of Spanish and English, known as Spanglish. We explore the embedding space to discover how they capture the meanings of words in both languages. We test the effectiveness of these embeddings by participating in SemEval 2020 Task 9: Sentiment Analysis on Code-Mixed Social Media Text. We utilised them to train a sentiment classifier that achieves an F-1 score of 0.722. This is higher than the baseline for the competition of 0.656, with our team (codalab username francesita) ranking 14 out of 29 participating teams, beating the baseline.</abstract>
<identifier type="citekey">laureano-de-leon-etal-2020-cs</identifier>
<identifier type="doi">10.18653/v1/2020.semeval-1.117</identifier>
<location>
<url>https://aclanthology.org/2020.semeval-1.117</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>922</start>
<end>927</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CS-Embed at SemEval-2020 Task 9: The Effectiveness of Code-switched Word Embeddings for Sentiment Analysis
%A Laureano De Leon, Frances Adriana
%A Guéniat, Florimond
%A Tayyar Madabushi, Harish
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y May, Jonathan
%Y Shutova, Ekaterina
%S Proceedings of the Fourteenth Workshop on Semantic Evaluation
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona (online)
%F laureano-de-leon-etal-2020-cs
%X The growing popularity and applications of sentiment analysis of social media posts has naturally led to sentiment analysis of posts written in multiple languages, a practice known as code-switching. While recent research into code-switched posts has focused on the use of multilingual word embeddings, these embeddings were not trained on code-switched data. In this work, we present word-embeddings trained on code-switched tweets, specifically those that make use of Spanish and English, known as Spanglish. We explore the embedding space to discover how they capture the meanings of words in both languages. We test the effectiveness of these embeddings by participating in SemEval 2020 Task 9: Sentiment Analysis on Code-Mixed Social Media Text. We utilised them to train a sentiment classifier that achieves an F-1 score of 0.722. This is higher than the baseline for the competition of 0.656, with our team (codalab username francesita) ranking 14 out of 29 participating teams, beating the baseline.
%R 10.18653/v1/2020.semeval-1.117
%U https://aclanthology.org/2020.semeval-1.117
%U https://doi.org/10.18653/v1/2020.semeval-1.117
%P 922-927
Markdown (Informal)
[CS-Embed at SemEval-2020 Task 9: The Effectiveness of Code-switched Word Embeddings for Sentiment Analysis](https://aclanthology.org/2020.semeval-1.117) (Laureano De Leon et al., SemEval 2020)
ACL