@InProceedings{rohanian-EtAl:2017:RANLP,
  author    = {Rohanian, Omid  and  Taslimipoor, Shiva  and  Yaneva, Victoria  and  Ha, Le An},
  title     = {Using Gaze Data to Predict Multiword Expressions},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {601--609},
  abstract  = {In recent years gaze data has been increasingly used to improve and evaluate
	NLP models due to the fact that it carries information about the cognitive
	processing of linguistic phenomena. In this paper we conduct a preliminary
	study
	towards the automatic identification of multiword expressions based on gaze
	features from native and non-native speakers of English. We report comparisons
	between a part-of-speech (POS) and frequency baseline to: i) a prediction model
	based solely on gaze data and ii) a combined model of gaze data, POS and
	frequency. In spite of the challenging nature of the task, best performance was
	achieved by the latter. Furthermore, we explore how the type of gaze data (from
	native versus non-native speakers) affects the prediction, showing that data
	from the two groups is discriminative to an equal degree for the task. Finally,
	we show that late processing measures are more predictive than early ones,
	which is in line with previous research on idioms and other formulaic
	structures.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_078}
}

