@inproceedings{sanayai-meetei-etal-2021-low,
title = "Low Resource Multimodal Neural Machine Translation of {E}nglish-{H}indi in News Domain",
author = "Sanayai Meetei, Loitongbam and
Singh, Thoudam Doren and
Bandyopadhyay, Sivaji",
editor = "Doren Singh, Thoudam and
Espa{\~n}a i Bonet, Cristina and
Bandyopadhyay, Sivaji and
van Genabith, Josef",
booktitle = "Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL 2021)",
month = sep,
year = "2021",
address = "Online (Virtual Mode)",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.mmtlrl-1.4",
pages = "20--29",
abstract = "Incorporating multiple input modalities in a machine translation (MT) system is gaining popularity among MT researchers. Unlike the publicly available dataset for Multimodal Machine Translation (MMT) tasks, where the captions are short image descriptions, the news captions provide a more detailed description of the contents of the images. As a result, numerous named entities relating to specific persons, locations, etc., are found. In this paper, we acquire two monolingual news datasets reported in English and Hindi paired with the images to generate a synthetic English-Hindi parallel corpus. The parallel corpus is used to train the English-Hindi Neural Machine Translation (NMT) and an English-Hindi MMT system by incorporating the image feature paired with the corresponding parallel corpus. We also conduct a systematic analysis to evaluate the English-Hindi MT systems with 1) more synthetic data and 2) by adding back-translated data. Our finding shows improvement in terms of BLEU scores for both the NMT (+8.05) and MMT (+11.03) systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sanayai-meetei-etal-2021-low">
<titleInfo>
<title>Low Resource Multimodal Neural Machine Translation of English-Hindi in News Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Loitongbam</namePart>
<namePart type="family">Sanayai Meetei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thoudam</namePart>
<namePart type="given">Doren</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thoudam</namePart>
<namePart type="family">Doren Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">España i Bonet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">van Genabith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Online (Virtual Mode)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Incorporating multiple input modalities in a machine translation (MT) system is gaining popularity among MT researchers. Unlike the publicly available dataset for Multimodal Machine Translation (MMT) tasks, where the captions are short image descriptions, the news captions provide a more detailed description of the contents of the images. As a result, numerous named entities relating to specific persons, locations, etc., are found. In this paper, we acquire two monolingual news datasets reported in English and Hindi paired with the images to generate a synthetic English-Hindi parallel corpus. The parallel corpus is used to train the English-Hindi Neural Machine Translation (NMT) and an English-Hindi MMT system by incorporating the image feature paired with the corresponding parallel corpus. We also conduct a systematic analysis to evaluate the English-Hindi MT systems with 1) more synthetic data and 2) by adding back-translated data. Our finding shows improvement in terms of BLEU scores for both the NMT (+8.05) and MMT (+11.03) systems.</abstract>
<identifier type="citekey">sanayai-meetei-etal-2021-low</identifier>
<location>
<url>https://aclanthology.org/2021.mmtlrl-1.4</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>20</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Low Resource Multimodal Neural Machine Translation of English-Hindi in News Domain
%A Sanayai Meetei, Loitongbam
%A Singh, Thoudam Doren
%A Bandyopadhyay, Sivaji
%Y Doren Singh, Thoudam
%Y España i Bonet, Cristina
%Y Bandyopadhyay, Sivaji
%Y van Genabith, Josef
%S Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Online (Virtual Mode)
%F sanayai-meetei-etal-2021-low
%X Incorporating multiple input modalities in a machine translation (MT) system is gaining popularity among MT researchers. Unlike the publicly available dataset for Multimodal Machine Translation (MMT) tasks, where the captions are short image descriptions, the news captions provide a more detailed description of the contents of the images. As a result, numerous named entities relating to specific persons, locations, etc., are found. In this paper, we acquire two monolingual news datasets reported in English and Hindi paired with the images to generate a synthetic English-Hindi parallel corpus. The parallel corpus is used to train the English-Hindi Neural Machine Translation (NMT) and an English-Hindi MMT system by incorporating the image feature paired with the corresponding parallel corpus. We also conduct a systematic analysis to evaluate the English-Hindi MT systems with 1) more synthetic data and 2) by adding back-translated data. Our finding shows improvement in terms of BLEU scores for both the NMT (+8.05) and MMT (+11.03) systems.
%U https://aclanthology.org/2021.mmtlrl-1.4
%P 20-29
Markdown (Informal)
[Low Resource Multimodal Neural Machine Translation of English-Hindi in News Domain](https://aclanthology.org/2021.mmtlrl-1.4) (Sanayai Meetei et al., MMTLRL 2021)
ACL