@InProceedings{sato-EtAl:2017:EACLlong,
  author    = {Sato, Motoki  and  Brockmeier, Austin J.  and  Kontonatsios, Georgios  and  Mu, Tingting  and  Goulermas, John Y.  and  Tsujii, Jun'ichi  and  Ananiadou, Sophia},
  title     = {Distributed Document and Phrase Co-embeddings for Descriptive Clustering},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {991--1001},
  abstract  = {Descriptive document clustering aims to automatically discover groups of
	semantically related documents and to assign a meaningful label to characterise
	the content of each cluster. In this paper, we present a descriptive clustering
	approach that employs a distributed representation model, namely the paragraph
	vector model, to capture semantic similarities between documents and phrases.
	The proposed method uses a joint representation of  phrases and documents
	(i.e., a co-embedding) to automatically select a descriptive phrase that best
	represents each document cluster. We evaluate our method by comparing its
	performance to an existing state-of-the-art descriptive clustering method that
	also uses co-embedding but relies on a bag-of-words representation.  Results
	obtained on  benchmark datasets demonstrate that the paragraph vector-based
	method obtains superior performance over the existing approach in both
	identifying clusters and assigning appropriate descriptive labels to them.},
  url       = {http://www.aclweb.org/anthology/E17-1093}
}

