@InProceedings{sperber-EtAl:2016:COLING,
  author    = {Sperber, Matthias  and  Neubig, Graham  and  Niehues, Jan  and  St\"{u}ker, Sebastian  and  Waibel, Alex},
  title     = {Lightly Supervised Quality Estimation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3103--3113},
  abstract  = {Evaluating the quality of output from language processing systems such as
	machine translation or speech recognition is an essential step in ensuring that
	they are sufficient for practical use. However, depending on the practical
	requirements, evaluation approaches can differ strongly. Often, reference-based
	evaluation measures (such as BLEU or WER) are appealing because they are cheap
	and allow rapid quantitative comparison. On the other hand, practitioners often
	focus on manual evaluation because they must deal with frequently changing
	domains and quality standards requested by customers, for which reference-based
	evaluation is insufficient or not possible due to missing in-domain reference
	data (Harris et al., 2016). In this paper, we attempt to bridge this gap by
	proposing a framework for lightly supervised quality estimation. We collect
	manually annotated scores for a small number of segments in a test corpus or
	document, and combine them with automatically predicted quality scores for the
	remaining segments to predict an overall quality estimate. An evaluation shows
	that our framework estimates  quality more reliably than using fully automatic
	quality estimation approaches, while keeping annotation effort low by not
	requiring full references to be available for the particular domain.},
  url       = {http://aclweb.org/anthology/C16-1292}
}

