@InProceedings{michel-ravichander-rijhwani:2017:RepL4NLP,
  author    = {Michel, Paul  and  Ravichander, Abhilasha  and  Rijhwani, Shruti},
  title     = {Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations},
  booktitle = {Proceedings of the 2nd Workshop on Representation Learning for NLP},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {235--240},
  abstract  = {We investigate the pertinence of methods from algebraic topology for text data
	analysis. These methods enable the development of mathematically-principled
	isometric-invariant mappings from a set of vectors to a document embedding,
	which is stable with respect to the geometry of the document in the selected
	metric space. 
	In this work, we evaluate the utility of these topology-based document
	representations in traditional NLP tasks, specifically document clustering and
	sentiment classification.
	We find that the embeddings do not benefit text analysis. In fact, performance
	is worse than simple techniques like tf-idf, indicating that the geometry of
	the document does not provide enough variability for classification on the
	basis of topic or sentiment in the chosen datasets.},
  url       = {http://www.aclweb.org/anthology/W17-2628}
}

