@InProceedings{ilievski-postma-vossen:2016:COLING,
  author    = {Ilievski, Filip  and  Postma, Marten  and  Vossen, Piek},
  title     = {Semantic overfitting: what 'world' do we consider when evaluating disambiguation of text?},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1180--1191},
  abstract  = {Semantic text processing faces the challenge of defining the relation between
	lexical expressions and the world to which they make reference within a period
	of time. It is unclear whether the current test sets used to evaluate
	disambiguation tasks are representative for the full complexity considering
	this time-anchored relation, resulting in semantic overfitting to a specific
	period and the frequent phenomena within. We conceptualize and formalize a set
	of metrics which evaluate this complexity of datasets. We provide evidence for
	their applicability on five different disambiguation tasks. To challenge
	semantic overfitting of disambiguation systems, we propose a time-based,
	metric-aware method for developing datasets in a systematic and semi-automated
	manner, as well as an event-based QA task.},
  url       = {http://aclweb.org/anthology/C16-1112}
}

