@inproceedings{manchanda-grunin-2020-domain,
title = "Domain Informed Neural Machine Translation: Developing Translation Services for Healthcare Enterprise",
author = "Manchanda, Sahil and
Grunin, Galina",
editor = "Martins, Andr{\'e} and
Moniz, Helena and
Fumega, Sara and
Martins, Bruno and
Batista, Fernando and
Coheur, Luisa and
Parra, Carla and
Trancoso, Isabel and
Turchi, Marco and
Bisazza, Arianna and
Moorkens, Joss and
Guerberof, Ana and
Nurminen, Mary and
Marg, Lena and
Forcada, Mikel L.",
booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
month = nov,
year = "2020",
address = "Lisboa, Portugal",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2020.eamt-1.27",
pages = "255--261",
abstract = "Neural Machine Translation (NMT) is a deep learning based approach that has achieved outstanding results lately in the translation community. The performance of NMT systems, however, is dependent on the availability of large amounts of in-domain parallel corpora. The business enterprises in domains such as legal and healthcare require specialized vocabulary but translation systems trained for a general purpose do not cater to these needs. The data in these domains is either hard to acquire or is very small in comparison to public data sets. This is a detailed report of using an open-source library to implement a machine translation system and successfully customizing it for the needs of a particular client in the healthcare domain. This report details the chronological development of every component of this system, namely, extraction of data from in-domain healthcare documents, a pre-processing pipeline for the data, data alignment and augmentation, training and a fully automated and robust deployment pipeline. This work proposes an efficient way for the continuous deployment of newly trained deep learning models. The deployed translation models are optimized for both inference time and cost.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manchanda-grunin-2020-domain">
<titleInfo>
<title>Domain Informed Neural Machine Translation: Developing Translation Services for Healthcare Enterprise</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sahil</namePart>
<namePart type="family">Manchanda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galina</namePart>
<namePart type="family">Grunin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Fumega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bruno</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Batista</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Coheur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carla</namePart>
<namePart type="family">Parra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joss</namePart>
<namePart type="family">Moorkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Guerberof</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lena</namePart>
<namePart type="family">Marg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Lisboa, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) is a deep learning based approach that has achieved outstanding results lately in the translation community. The performance of NMT systems, however, is dependent on the availability of large amounts of in-domain parallel corpora. The business enterprises in domains such as legal and healthcare require specialized vocabulary but translation systems trained for a general purpose do not cater to these needs. The data in these domains is either hard to acquire or is very small in comparison to public data sets. This is a detailed report of using an open-source library to implement a machine translation system and successfully customizing it for the needs of a particular client in the healthcare domain. This report details the chronological development of every component of this system, namely, extraction of data from in-domain healthcare documents, a pre-processing pipeline for the data, data alignment and augmentation, training and a fully automated and robust deployment pipeline. This work proposes an efficient way for the continuous deployment of newly trained deep learning models. The deployed translation models are optimized for both inference time and cost.</abstract>
<identifier type="citekey">manchanda-grunin-2020-domain</identifier>
<location>
<url>https://aclanthology.org/2020.eamt-1.27</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>255</start>
<end>261</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Domain Informed Neural Machine Translation: Developing Translation Services for Healthcare Enterprise
%A Manchanda, Sahil
%A Grunin, Galina
%Y Martins, André
%Y Moniz, Helena
%Y Fumega, Sara
%Y Martins, Bruno
%Y Batista, Fernando
%Y Coheur, Luisa
%Y Parra, Carla
%Y Trancoso, Isabel
%Y Turchi, Marco
%Y Bisazza, Arianna
%Y Moorkens, Joss
%Y Guerberof, Ana
%Y Nurminen, Mary
%Y Marg, Lena
%Y Forcada, Mikel L.
%S Proceedings of the 22nd Annual Conference of the European Association for Machine Translation
%D 2020
%8 November
%I European Association for Machine Translation
%C Lisboa, Portugal
%F manchanda-grunin-2020-domain
%X Neural Machine Translation (NMT) is a deep learning based approach that has achieved outstanding results lately in the translation community. The performance of NMT systems, however, is dependent on the availability of large amounts of in-domain parallel corpora. The business enterprises in domains such as legal and healthcare require specialized vocabulary but translation systems trained for a general purpose do not cater to these needs. The data in these domains is either hard to acquire or is very small in comparison to public data sets. This is a detailed report of using an open-source library to implement a machine translation system and successfully customizing it for the needs of a particular client in the healthcare domain. This report details the chronological development of every component of this system, namely, extraction of data from in-domain healthcare documents, a pre-processing pipeline for the data, data alignment and augmentation, training and a fully automated and robust deployment pipeline. This work proposes an efficient way for the continuous deployment of newly trained deep learning models. The deployed translation models are optimized for both inference time and cost.
%U https://aclanthology.org/2020.eamt-1.27
%P 255-261
Markdown (Informal)
[Domain Informed Neural Machine Translation: Developing Translation Services for Healthcare Enterprise](https://aclanthology.org/2020.eamt-1.27) (Manchanda & Grunin, EAMT 2020)
ACL