@inproceedings{wirth-etal-2023-building,
title = "Building Machine Translation Tools for Patent Language: A Data Generation Strategy at the {E}uropean Patent Office",
author = {Wirth, Matthias and
H{\"a}hnke, Volker D. and
Mascia, Franco and
W{\'e}ry, Arnaud and
Vowinckel, Konrad and
del Rey, Marco and
del Pozo, Ra{\'u}l Mohedano and
Montes, Pau and
Klenner-Bajaja, Alexander},
editor = "Nurminen, Mary and
Brenner, Judith and
Koponen, Maarit and
Latomaa, Sirkku and
Mikhailov, Mikhail and
Schierl, Frederike and
Ranasinghe, Tharindu and
Vanmassenhove, Eva and
Vidal, Sergi Alvarez and
Aranberri, Nora and
Nunziatini, Mara and
Escart{\'\i}n, Carla Parra and
Forcada, Mikel and
Popovic, Maja and
Scarton, Carolina and
Moniz, Helena",
booktitle = "Proceedings of the 24th Annual Conference of the European Association for Machine Translation",
month = jun,
year = "2023",
address = "Tampere, Finland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2023.eamt-1.46",
pages = "471--479",
abstract = "The European Patent Office (EPO) is an international organisation responsible for granting patents and promoting global cooperation in the intellectual property world. With three official languages (English, German, French) and a need to constantly access and manipulate information in multiple languages, machine translation is essential for the EPO. Over the last years we have developed internal machine translation engines, specifically for the translation of patent language. This article presents our data generation strategy: it describes our approach to the generation of parallel corpora of documents, training datasets of aligned sentences, and respective evaluation datasets. Details on the challenges and technical implementation are presented, as well as statistics of the training dataset generation process.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wirth-etal-2023-building">
<titleInfo>
<title>Building Machine Translation Tools for Patent Language: A Data Generation Strategy at the European Patent Office</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Wirth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Volker</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Hähnke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franco</namePart>
<namePart type="family">Mascia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arnaud</namePart>
<namePart type="family">Wéry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konrad</namePart>
<namePart type="family">Vowinckel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">del Rey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="given">Mohedano</namePart>
<namePart type="family">del Pozo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pau</namePart>
<namePart type="family">Montes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Klenner-Bajaja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Judith</namePart>
<namePart type="family">Brenner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maarit</namePart>
<namePart type="family">Koponen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sirkku</namePart>
<namePart type="family">Latomaa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikhail</namePart>
<namePart type="family">Mikhailov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederike</namePart>
<namePart type="family">Schierl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Vanmassenhove</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergi</namePart>
<namePart type="given">Alvarez</namePart>
<namePart type="family">Vidal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Aranberri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mara</namePart>
<namePart type="family">Nunziatini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carla</namePart>
<namePart type="given">Parra</namePart>
<namePart type="family">Escartín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Tampere, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The European Patent Office (EPO) is an international organisation responsible for granting patents and promoting global cooperation in the intellectual property world. With three official languages (English, German, French) and a need to constantly access and manipulate information in multiple languages, machine translation is essential for the EPO. Over the last years we have developed internal machine translation engines, specifically for the translation of patent language. This article presents our data generation strategy: it describes our approach to the generation of parallel corpora of documents, training datasets of aligned sentences, and respective evaluation datasets. Details on the challenges and technical implementation are presented, as well as statistics of the training dataset generation process.</abstract>
<identifier type="citekey">wirth-etal-2023-building</identifier>
<location>
<url>https://aclanthology.org/2023.eamt-1.46</url>
</location>
<part>
<date>2023-06</date>
<extent unit="page">
<start>471</start>
<end>479</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building Machine Translation Tools for Patent Language: A Data Generation Strategy at the European Patent Office
%A Wirth, Matthias
%A Hähnke, Volker D.
%A Mascia, Franco
%A Wéry, Arnaud
%A Vowinckel, Konrad
%A del Rey, Marco
%A del Pozo, Raúl Mohedano
%A Montes, Pau
%A Klenner-Bajaja, Alexander
%Y Nurminen, Mary
%Y Brenner, Judith
%Y Koponen, Maarit
%Y Latomaa, Sirkku
%Y Mikhailov, Mikhail
%Y Schierl, Frederike
%Y Ranasinghe, Tharindu
%Y Vanmassenhove, Eva
%Y Vidal, Sergi Alvarez
%Y Aranberri, Nora
%Y Nunziatini, Mara
%Y Escartín, Carla Parra
%Y Forcada, Mikel
%Y Popovic, Maja
%Y Scarton, Carolina
%Y Moniz, Helena
%S Proceedings of the 24th Annual Conference of the European Association for Machine Translation
%D 2023
%8 June
%I European Association for Machine Translation
%C Tampere, Finland
%F wirth-etal-2023-building
%X The European Patent Office (EPO) is an international organisation responsible for granting patents and promoting global cooperation in the intellectual property world. With three official languages (English, German, French) and a need to constantly access and manipulate information in multiple languages, machine translation is essential for the EPO. Over the last years we have developed internal machine translation engines, specifically for the translation of patent language. This article presents our data generation strategy: it describes our approach to the generation of parallel corpora of documents, training datasets of aligned sentences, and respective evaluation datasets. Details on the challenges and technical implementation are presented, as well as statistics of the training dataset generation process.
%U https://aclanthology.org/2023.eamt-1.46
%P 471-479
Markdown (Informal)
[Building Machine Translation Tools for Patent Language: A Data Generation Strategy at the European Patent Office](https://aclanthology.org/2023.eamt-1.46) (Wirth et al., EAMT 2023)
ACL
- Matthias Wirth, Volker D. Hähnke, Franco Mascia, Arnaud Wéry, Konrad Vowinckel, Marco del Rey, Raúl Mohedano del Pozo, Pau Montes, and Alexander Klenner-Bajaja. 2023. Building Machine Translation Tools for Patent Language: A Data Generation Strategy at the European Patent Office. In Proceedings of the 24th Annual Conference of the European Association for Machine Translation, pages 471–479, Tampere, Finland. European Association for Machine Translation.