@inproceedings{zhao-schutze-2019-multilingual,
title = "A Multilingual {BPE} Embedding Space for Universal Sentiment Lexicon Induction",
author = {Zhao, Mengjie and
Sch{\"u}tze, Hinrich},
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1341/",
doi = "10.18653/v1/P19-1341",
pages = "3506--3517",
abstract = "We present a new method for sentiment lexicon induction that is designed to be applicable to the entire range of typological diversity of the world`s languages. We evaluate our method on Parallel Bible Corpus+ (PBC+), a parallel corpus of 1593 languages. The key idea is to use Byte Pair Encodings (BPEs) as basic units for multilingual embeddings. Through zero-shot transfer from English sentiment, we learn a seed lexicon for each language in the domain of PBC+. Through domain adaptation, we then generalize the domain-specific lexicon to a general one. We show {--} across typologically diverse languages in PBC+ {--} good quality of seed and general-domain sentiment lexicons by intrinsic and extrinsic and by automatic and human evaluation. We make freely available our code, seed sentiment lexicons for all 1593 languages and induced general-domain sentiment lexicons for 200 languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-schutze-2019-multilingual">
<titleInfo>
<title>A Multilingual BPE Embedding Space for Universal Sentiment Lexicon Induction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mengjie</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schütze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a new method for sentiment lexicon induction that is designed to be applicable to the entire range of typological diversity of the world‘s languages. We evaluate our method on Parallel Bible Corpus+ (PBC+), a parallel corpus of 1593 languages. The key idea is to use Byte Pair Encodings (BPEs) as basic units for multilingual embeddings. Through zero-shot transfer from English sentiment, we learn a seed lexicon for each language in the domain of PBC+. Through domain adaptation, we then generalize the domain-specific lexicon to a general one. We show – across typologically diverse languages in PBC+ – good quality of seed and general-domain sentiment lexicons by intrinsic and extrinsic and by automatic and human evaluation. We make freely available our code, seed sentiment lexicons for all 1593 languages and induced general-domain sentiment lexicons for 200 languages.</abstract>
<identifier type="citekey">zhao-schutze-2019-multilingual</identifier>
<identifier type="doi">10.18653/v1/P19-1341</identifier>
<location>
<url>https://aclanthology.org/P19-1341/</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>3506</start>
<end>3517</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multilingual BPE Embedding Space for Universal Sentiment Lexicon Induction
%A Zhao, Mengjie
%A Schütze, Hinrich
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F zhao-schutze-2019-multilingual
%X We present a new method for sentiment lexicon induction that is designed to be applicable to the entire range of typological diversity of the world‘s languages. We evaluate our method on Parallel Bible Corpus+ (PBC+), a parallel corpus of 1593 languages. The key idea is to use Byte Pair Encodings (BPEs) as basic units for multilingual embeddings. Through zero-shot transfer from English sentiment, we learn a seed lexicon for each language in the domain of PBC+. Through domain adaptation, we then generalize the domain-specific lexicon to a general one. We show – across typologically diverse languages in PBC+ – good quality of seed and general-domain sentiment lexicons by intrinsic and extrinsic and by automatic and human evaluation. We make freely available our code, seed sentiment lexicons for all 1593 languages and induced general-domain sentiment lexicons for 200 languages.
%R 10.18653/v1/P19-1341
%U https://aclanthology.org/P19-1341/
%U https://doi.org/10.18653/v1/P19-1341
%P 3506-3517
Markdown (Informal)
[A Multilingual BPE Embedding Space for Universal Sentiment Lexicon Induction](https://aclanthology.org/P19-1341/) (Zhao & Schütze, ACL 2019)
ACL