@InProceedings{butnaru-ionescu-hristea:2017:EACLlong,
  author    = {Butnaru, Andrei  and  Ionescu, Radu Tudor  and  Hristea, Florentina},
  title     = {ShotgunWSD: An unsupervised algorithm for global word sense disambiguation inspired by DNA sequencing},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {916--926},
  abstract  = {In this paper, we present a novel unsupervised algorithm for word sense
	disambiguation (WSD) at the document level. Our algorithm is inspired by a
	widely-used approach in the field of genetics for whole genome sequencing,
	known as the Shotgun sequencing technique. The proposed WSD algorithm is based
	on three main steps. First, a brute-force WSD algorithm is applied to short
	context windows (up to 10 words) selected from the document in order to
	generate a short list of likely sense configurations for each window. In the
	second step, these local sense configurations are assembled into longer
	composite configurations based on suffix and prefix matching. The resulted
	configurations are ranked by their length, and the sense of each word is chosen
	based on a voting scheme that considers only the top k configurations in which
	the word appears. We compare our algorithm with other state-of-the-art
	unsupervised WSD algorithms and demonstrate better performance, sometimes by a
	very large margin. We also show that our algorithm can yield better performance
	than the Most Common Sense (MCS) baseline on one data set. Moreover, our
	algorithm has a very small number of parameters, is robust to parameter tuning,
	and, unlike other bio-inspired methods, it gives a deterministic solution (it
	does not involve random choices).},
  url       = {http://www.aclweb.org/anthology/E17-1086}
}

