@InProceedings{barbu:2017:RANLP,
  author    = {Barbu, Eduard},
  title     = {Ensembles of Classifiers for Cleaning Web Parallel Corpora and Translation Memories},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {71--77},
  abstract  = {The last years witnessed an increasing interest in the automatic methods for
	spotting false translation units in translation memories. This problem presents
	a great interest to industry as there are many translation memories that
	contain errors. A closely related line of research deals with identifying
	sentences that do not align in the parallel corpora mined from the web. The
	task of spotting false translations is modeled as a binary classification
	problem.  It is known that in certain conditions the ensembles of classifiers
	improve over the performance of the individual members. In this paper we
	benchmark the most popular ensemble of classifiers: Majority Voting, Bagging,
	Stacking and Ada Boost at the task of spotting false translation units for
	translation memories and parallel web corpora. We want to know if for this
	specific problem any ensemble technique improves the performance of the
	individual classifiers and if there is a difference between the data in
	translation memories and parallel web corpora with respect to this task.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_011}
}

