@inproceedings{ramirez-sanchez-etal-2022-human,
title = "Human evaluation of web-crawled parallel corpora for machine translation",
author = "Ram{\'\i}rez-S{\'a}nchez, Gema and
Ba{\~n}{\'o}n, Marta and
Zaragoza-Bernabeu, Jaume and
Ortiz Rojas, Sergio",
editor = "Belz, Anya and
Popovi{\'c}, Maja and
Reiter, Ehud and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.humeval-1.4",
doi = "10.18653/v1/2022.humeval-1.4",
pages = "32--41",
abstract = "Quality assessment has been an ongoing activity of the series of ParaCrawl efforts to crawl massive amounts of parallel data from multilingual websites for 29 languages. The goal of ParaCrawl is to get parallel data that is good for machine translation. To prove so, both, automatic (extrinsic) and human (intrinsic and extrinsic) evaluation tasks have been included as part of the quality assessment activity of the project. We sum up the various methods followed to address these evaluation tasks for the web-crawled corpora produced and their results. We review their advantages and disadvantages for the final goal of the ParaCrawl project and the related ongoing project MaCoCu.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ramirez-sanchez-etal-2022-human">
<titleInfo>
<title>Human evaluation of web-crawled parallel corpora for machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gema</namePart>
<namePart type="family">Ramírez-Sánchez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Bañón</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaume</namePart>
<namePart type="family">Zaragoza-Bernabeu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Ortiz Rojas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Quality assessment has been an ongoing activity of the series of ParaCrawl efforts to crawl massive amounts of parallel data from multilingual websites for 29 languages. The goal of ParaCrawl is to get parallel data that is good for machine translation. To prove so, both, automatic (extrinsic) and human (intrinsic and extrinsic) evaluation tasks have been included as part of the quality assessment activity of the project. We sum up the various methods followed to address these evaluation tasks for the web-crawled corpora produced and their results. We review their advantages and disadvantages for the final goal of the ParaCrawl project and the related ongoing project MaCoCu.</abstract>
<identifier type="citekey">ramirez-sanchez-etal-2022-human</identifier>
<identifier type="doi">10.18653/v1/2022.humeval-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.humeval-1.4</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>32</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Human evaluation of web-crawled parallel corpora for machine translation
%A Ramírez-Sánchez, Gema
%A Bañón, Marta
%A Zaragoza-Bernabeu, Jaume
%A Ortiz Rojas, Sergio
%Y Belz, Anya
%Y Popović, Maja
%Y Reiter, Ehud
%Y Shimorina, Anastasia
%S Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F ramirez-sanchez-etal-2022-human
%X Quality assessment has been an ongoing activity of the series of ParaCrawl efforts to crawl massive amounts of parallel data from multilingual websites for 29 languages. The goal of ParaCrawl is to get parallel data that is good for machine translation. To prove so, both, automatic (extrinsic) and human (intrinsic and extrinsic) evaluation tasks have been included as part of the quality assessment activity of the project. We sum up the various methods followed to address these evaluation tasks for the web-crawled corpora produced and their results. We review their advantages and disadvantages for the final goal of the ParaCrawl project and the related ongoing project MaCoCu.
%R 10.18653/v1/2022.humeval-1.4
%U https://aclanthology.org/2022.humeval-1.4
%U https://doi.org/10.18653/v1/2022.humeval-1.4
%P 32-41
Markdown (Informal)
[Human evaluation of web-crawled parallel corpora for machine translation](https://aclanthology.org/2022.humeval-1.4) (Ramírez-Sánchez et al., HumEval 2022)
ACL