@inproceedings{koehn-etal-2020-findings,
    title = "Findings of the {WMT} 2020 Shared Task on Parallel Corpus Filtering and Alignment",
    author = "Koehn, Philipp  and
      Chaudhary, Vishrav  and
      El-Kishky, Ahmed  and
      Goyal, Naman  and
      Chen, Peng-Jen  and
      Guzm{\'a}n, Francisco",
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.wmt-1.78",
    pages = "726--742",
    abstract = "Following two preceding WMT Shared Task on Parallel Corpus Filtering (Koehn et al., 2018, 2019), we posed again the challenge of assigning sentence-level quality scores for very noisy corpora of sentence pairs crawled from the web, with the goal of sub-selecting the highest-quality data to be used to train ma-chine translation systems. This year, the task tackled the low resource condition of Pashto{--}English and Khmer{--}English and also included the challenge of sentence alignment from document pairs.",
}
