@article{TACL809,
	author = {Hashimoto, Tatsunori  and Alvarez-Melis, David  and Jaakkola, Tommi },
	title = {Word Embeddings as Metric Recovery in Semantic Spaces},
	journal = {Transactions of the Association for Computational Linguistics},
	volume = {4},
	year = {2016},
	keywords = {},
	abstract = {Continuous word representations have been remarkably useful across NLP tasks but remain poorly understood. We ground word embeddings in semantic spaces studied in the cognitive-psychometric literature, taking these spaces as the primary objects to recover. To this end, we relate log co-occurrences of words in large corpora to semantic similarity assessments and show that co-occurrences are indeed consistent with an Euclidean semantic space hypothesis. Framing word embedding as metric recovery of a semantic space unifies existing word embedding algorithms, ties them to manifold learning, and demonstrates that existing algorithms are consistent metric recovery methods given co-occurrence counts from random walks. Furthermore, we propose a simple, principled, direct metric recovery algorithm that performs on par with the state-of-the-art word embedding and manifold learning methods. Finally, we complement recent focus on analogies by constructing two new inductive reasoning datasets---series completion and classification---and demonstrate that word embeddings can be used to solve them as well.},
	issn = {2307-387X},
	url = {https://transacl.org/ojs/index.php/tacl/article/view/809},
	pages = {273--286}
}
