@InProceedings{stanojevic-simaan:2017:Short,
  author    = {Stanojevi\'{c}, Milo\v{s}  and  Sima'an, Khalil},
  title     = {Alternative Objective Functions for Training MT Evaluation Metrics},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {20--25},
  abstract  = {MT evaluation metrics are tested for correlation with human judgments either at
	the sentence- or the corpus-level. Trained metrics ignore corpus-level
	judgments and are trained for high sentence-level correlation only. We show
	that training only for one objective (sentence or corpus level), can not only
	harm the performance on the other objective, but it can also be suboptimal for
	the objective being optimized. To this end we present a metric trained for
	corpus-level and show empirical comparison against a metric trained for
	sentence-level exemplifying how their performance may vary per language pair,
	type and level of judgment. Subsequently we propose a model trained to optimize
	both objectives simultaneously and show that it is far more stable than--and on
	average outperforms--both models on both objectives.},
  url       = {http://aclweb.org/anthology/P17-2004}
}

