@inproceedings{rivera-trigueros-olvera-lobo-2021-building,
title = "Building a Corpus for Corporate Websites Machine Translation Evaluation. A Step by Step Methodological Approach",
author = "Rivera-Trigueros, Irene and
Olvera-Lobo, Mar{\'\i}a-Dolores",
editor = "Mitkov, Ruslan and
Sosoni, Vilelmini and
Gigu{\`e}re, Julie Christine and
Murgolo, Elena and
Deysel, Elizabeth",
booktitle = "Proceedings of the Translation and Interpreting Technology Online Conference",
month = jul,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.triton-1.11",
pages = "93--101",
abstract = "The aim of this paper is to describe the process carried out to develop a paral-lel corpus comprised of texts extracted from the corporate websites of south-ern Spanish SMEs from the sanitary sector which will serve as the basis for MT quality assessment. The stages for compiling the parallel corpora were: (i) selection of websites with content translated in English and Spanish, (ii) downloading of the HTML files of the selected websites, (iii) files filtering and pairing of English files with their Spanish equivalents, (iv) compilation of individual corpora (EN and ES) for each of the selected websites, (v) merging of the individual corpora into a two general corpus one in English and the other in Spanish, (vi) selection a representative sample of segments to be used as original (ES) and reference translations (EN), (vii) building of the parallel corpus intended for MT evaluation. The parallel corpus generated will serve to future Machine Translation quality assessment. In addition, the monolingual corpora generated during the process could as a base to carry out research focused on linguistic {--} bilingual or monolingual − analysis.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rivera-trigueros-olvera-lobo-2021-building">
<titleInfo>
<title>Building a Corpus for Corporate Websites Machine Translation Evaluation. A Step by Step Methodological Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irene</namePart>
<namePart type="family">Rivera-Trigueros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">María-Dolores</namePart>
<namePart type="family">Olvera-Lobo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Translation and Interpreting Technology Online Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vilelmini</namePart>
<namePart type="family">Sosoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="given">Christine</namePart>
<namePart type="family">Giguère</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Murgolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Deysel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The aim of this paper is to describe the process carried out to develop a paral-lel corpus comprised of texts extracted from the corporate websites of south-ern Spanish SMEs from the sanitary sector which will serve as the basis for MT quality assessment. The stages for compiling the parallel corpora were: (i) selection of websites with content translated in English and Spanish, (ii) downloading of the HTML files of the selected websites, (iii) files filtering and pairing of English files with their Spanish equivalents, (iv) compilation of individual corpora (EN and ES) for each of the selected websites, (v) merging of the individual corpora into a two general corpus one in English and the other in Spanish, (vi) selection a representative sample of segments to be used as original (ES) and reference translations (EN), (vii) building of the parallel corpus intended for MT evaluation. The parallel corpus generated will serve to future Machine Translation quality assessment. In addition, the monolingual corpora generated during the process could as a base to carry out research focused on linguistic – bilingual or monolingual − analysis.</abstract>
<identifier type="citekey">rivera-trigueros-olvera-lobo-2021-building</identifier>
<location>
<url>https://aclanthology.org/2021.triton-1.11</url>
</location>
<part>
<date>2021-07</date>
<extent unit="page">
<start>93</start>
<end>101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Corpus for Corporate Websites Machine Translation Evaluation. A Step by Step Methodological Approach
%A Rivera-Trigueros, Irene
%A Olvera-Lobo, María-Dolores
%Y Mitkov, Ruslan
%Y Sosoni, Vilelmini
%Y Giguère, Julie Christine
%Y Murgolo, Elena
%Y Deysel, Elizabeth
%S Proceedings of the Translation and Interpreting Technology Online Conference
%D 2021
%8 July
%I INCOMA Ltd.
%C Held Online
%F rivera-trigueros-olvera-lobo-2021-building
%X The aim of this paper is to describe the process carried out to develop a paral-lel corpus comprised of texts extracted from the corporate websites of south-ern Spanish SMEs from the sanitary sector which will serve as the basis for MT quality assessment. The stages for compiling the parallel corpora were: (i) selection of websites with content translated in English and Spanish, (ii) downloading of the HTML files of the selected websites, (iii) files filtering and pairing of English files with their Spanish equivalents, (iv) compilation of individual corpora (EN and ES) for each of the selected websites, (v) merging of the individual corpora into a two general corpus one in English and the other in Spanish, (vi) selection a representative sample of segments to be used as original (ES) and reference translations (EN), (vii) building of the parallel corpus intended for MT evaluation. The parallel corpus generated will serve to future Machine Translation quality assessment. In addition, the monolingual corpora generated during the process could as a base to carry out research focused on linguistic – bilingual or monolingual − analysis.
%U https://aclanthology.org/2021.triton-1.11
%P 93-101
Markdown (Informal)
[Building a Corpus for Corporate Websites Machine Translation Evaluation. A Step by Step Methodological Approach](https://aclanthology.org/2021.triton-1.11) (Rivera-Trigueros & Olvera-Lobo, TRITON 2021)
ACL