@InProceedings{sennrich:2017:EACLshort,
  author    = {Sennrich, Rico},
  title     = {How Grammatical is Character-level Neural Machine Translation? Assessing MT Quality with Contrastive Translation Pairs},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {376--382},
  abstract  = {Analysing translation quality in regards to specific linguistic phenomena has
	historically been difficult and time-consuming. Neural machine translation has
	the attractive property that it can produce scores for arbitrary translations,
	and we propose a novel method to assess how well NMT systems model specific
	linguistic phenomena such as agreement over long distances, the production of
	novel words, and the faithful translation of polarity. The core idea is that we
	measure whether a reference translation is more probable under a NMT model than
	a contrastive translation which introduces a specific type of error. We present
	LingEval97, a large-scale data set of 97000 contrastive translation pairs based
	on the WMT English->German translation task, with errors automatically created
	with simple rules. We report results for a number of systems, and find that
	recently introduced character-level NMT systems perform better at
	transliteration than models with byte-pair encoding (BPE) segmentation, but
	perform more poorly at morphosyntactic agreement, and translating discontiguous
	units of meaning.},
  url       = {http://www.aclweb.org/anthology/E17-2060}
}