@inproceedings{barbu-2017-ensembles,
title = "Ensembles of Classifiers for Cleaning Web Parallel Corpora and Translation Memories",
author = "Barbu, Eduard",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_011",
doi = "10.26615/978-954-452-049-6_011",
pages = "71--77",
abstract = "The last years witnessed an increasing interest in the automatic methods for spotting false translation units in translation memories. This problem presents a great interest to industry as there are many translation memories that contain errors. A closely related line of research deals with identifying sentences that do not align in the parallel corpora mined from the web. The task of spotting false translations is modeled as a binary classification problem. It is known that in certain conditions the ensembles of classifiers improve over the performance of the individual members. In this paper we benchmark the most popular ensemble of classifiers: Majority Voting, Bagging, Stacking and Ada Boost at the task of spotting false translation units for translation memories and parallel web corpora. We want to know if for this specific problem any ensemble technique improves the performance of the individual classifiers and if there is a difference between the data in translation memories and parallel web corpora with respect to this task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barbu-2017-ensembles">
<titleInfo>
<title>Ensembles of Classifiers for Cleaning Web Parallel Corpora and Translation Memories</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Barbu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The last years witnessed an increasing interest in the automatic methods for spotting false translation units in translation memories. This problem presents a great interest to industry as there are many translation memories that contain errors. A closely related line of research deals with identifying sentences that do not align in the parallel corpora mined from the web. The task of spotting false translations is modeled as a binary classification problem. It is known that in certain conditions the ensembles of classifiers improve over the performance of the individual members. In this paper we benchmark the most popular ensemble of classifiers: Majority Voting, Bagging, Stacking and Ada Boost at the task of spotting false translation units for translation memories and parallel web corpora. We want to know if for this specific problem any ensemble technique improves the performance of the individual classifiers and if there is a difference between the data in translation memories and parallel web corpora with respect to this task.</abstract>
<identifier type="citekey">barbu-2017-ensembles</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_011</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>71</start>
<end>77</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Ensembles of Classifiers for Cleaning Web Parallel Corpora and Translation Memories
%A Barbu, Eduard
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F barbu-2017-ensembles
%X The last years witnessed an increasing interest in the automatic methods for spotting false translation units in translation memories. This problem presents a great interest to industry as there are many translation memories that contain errors. A closely related line of research deals with identifying sentences that do not align in the parallel corpora mined from the web. The task of spotting false translations is modeled as a binary classification problem. It is known that in certain conditions the ensembles of classifiers improve over the performance of the individual members. In this paper we benchmark the most popular ensemble of classifiers: Majority Voting, Bagging, Stacking and Ada Boost at the task of spotting false translation units for translation memories and parallel web corpora. We want to know if for this specific problem any ensemble technique improves the performance of the individual classifiers and if there is a difference between the data in translation memories and parallel web corpora with respect to this task.
%R 10.26615/978-954-452-049-6_011
%U https://doi.org/10.26615/978-954-452-049-6_011
%P 71-77
Markdown (Informal)
[Ensembles of Classifiers for Cleaning Web Parallel Corpora and Translation Memories](https://doi.org/10.26615/978-954-452-049-6_011) (Barbu, RANLP 2017)
ACL