@InProceedings{upadhyay-EtAl:2017:RepL4NLP,
  author    = {Upadhyay, Shyam  and  Chang, Kai-Wei  and  Taddy, Matt  and  Kalai, Adam  and  Zou, James},
  title     = {Beyond Bilingual: Multi-sense Word Embeddings using Multilingual Context},
  booktitle = {Proceedings of the 2nd Workshop on Representation Learning for NLP},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {101--110},
  abstract  = {Word embeddings, which represent a word as a point in a vector space, have
	become ubiquitous to several NLP tasks.
	A recent line of work uses bilingual (two languages) corpora to learn a
	different vector
	 for each sense of a word, by exploiting crosslingual signals to aid sense
	identification.
	We present a multi-view Bayesian non-parametric algorithm which improves
	multi-sense wor
	d embeddings by
	(a) using multilingual (i.e., more than two languages) corpora to significantly
	improve
	sense embeddings beyond what one achieves with bilingual information, and (b)
	uses a principled approach to learn a variable number of senses per word, in a
	data-driven manner.
	Ours is the first approach with the ability to leverage multilingual corpora
	efficiently
	 for multi-sense representation learning.
	Experiments show that multilingual training significantly improves performance
	over monolingual and bilingual training, by allowing us to combine different
	parallel corpora to
	leverage multilingual context. Multilingual training yields comparable
	performance to a
	state of the art monolingual model trained on five times more training data.},
  url       = {http://www.aclweb.org/anthology/W17-2613}
}

