@InProceedings{zhu-klabjan-bless:2017:I17-1,
  author    = {Zhu, Xiaofeng  and  Klabjan, Diego  and  Bless, Patrick},
  title     = {Semantic Document Distance Measures and Unsupervised Document Revision Detection},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {947--956},
  abstract  = {In this paper, we model the document revision detection problem as a minimum
	cost branching problem that relies on computing document distances.
	Furthermore, we propose two new document distance measures, word vector-based
	Dynamic Time Warping (wDTW) and word vector-based Tree Edit Distance (wTED).
	Our revision detection system is designed for a large scale corpus and
	implemented in Apache Spark. We demonstrate that our system can more precisely
	detect revisions than state-of-the-art methods by utilizing the Wikipedia
	revision dumps and simulated data sets.},
  url       = {http://www.aclweb.org/anthology/I17-1095}
}

