@inproceedings{kumar-etal-2021-multilingual,
title = "Multilingual Multi-Domain {NMT} for {I}ndian Languages",
author = "Kumar, Sourav and
Aggarwal, Salil and
Sharma, Dipti",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.83",
pages = "727--733",
abstract = "India is known as the land of many tongues and dialects. Neural machine translation (NMT) is the current state-of-the-art approach for machine translation (MT) but performs better only with large datasets which Indian languages usually lack, making this approach infeasible. So, in this paper, we address the problem of data scarcity by efficiently training multilingual and multilingual multi domain NMT systems involving languages of the ๐๐ง๐๐ข๐๐ง ๐ฌ๐ฎ๐๐๐จ๐ง๐ญ๐ข๐ง๐๐ง๐ญ. We are proposing the technique for using the joint domain and language tags in a multilingual setup. We draw three major conclusions from our experiments: (i) Training a multilingual system via exploiting lexical similarity based on language family helps in achieving an overall average improvement of ๐.๐๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ over bilingual baselines, (ii) Technique of incorporating domain information into the language tokens helps multilingual multi-domain system in getting a significant average improvement of ๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ over the baselines, (iii) Multistage fine-tuning further helps in getting an improvement of ๐-๐.๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ for the language pair of interest.",
}
๏ปฟ<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2021-multilingual">
<titleInfo>
<title>Multilingual Multi-Domain NMT for Indian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sourav</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salil</namePart>
<namePart type="family">Aggarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>India is known as the land of many tongues and dialects. Neural machine translation (NMT) is the current state-of-the-art approach for machine translation (MT) but performs better only with large datasets which Indian languages usually lack, making this approach infeasible. So, in this paper, we address the problem of data scarcity by efficiently training multilingual and multilingual multi domain NMT systems involving languages of the ๐๐ง๐๐ข๐๐ง ๐ฌ๐ฎ๐๐๐จ๐ง๐ญ๐ข๐ง๐๐ง๐ญ. We are proposing the technique for using the joint domain and language tags in a multilingual setup. We draw three major conclusions from our experiments: (i) Training a multilingual system via exploiting lexical similarity based on language family helps in achieving an overall average improvement of ๐.๐๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ over bilingual baselines, (ii) Technique of incorporating domain information into the language tokens helps multilingual multi-domain system in getting a significant average improvement of ๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ over the baselines, (iii) Multistage fine-tuning further helps in getting an improvement of ๐-๐.๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ for the language pair of interest.</abstract>
<identifier type="citekey">kumar-etal-2021-multilingual</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.83</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>727</start>
<end>733</end>
</extent>
</part>
</mods>
</modsCollection>
๏ปฟ%0 Conference Proceedings
%T Multilingual Multi-Domain NMT for Indian Languages
%A Kumar, Sourav
%A Aggarwal, Salil
%A Sharma, Dipti
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F kumar-etal-2021-multilingual
%X India is known as the land of many tongues and dialects. Neural machine translation (NMT) is the current state-of-the-art approach for machine translation (MT) but performs better only with large datasets which Indian languages usually lack, making this approach infeasible. So, in this paper, we address the problem of data scarcity by efficiently training multilingual and multilingual multi domain NMT systems involving languages of the ๐๐ง๐๐ข๐๐ง ๐ฌ๐ฎ๐๐๐จ๐ง๐ญ๐ข๐ง๐๐ง๐ญ. We are proposing the technique for using the joint domain and language tags in a multilingual setup. We draw three major conclusions from our experiments: (i) Training a multilingual system via exploiting lexical similarity based on language family helps in achieving an overall average improvement of ๐.๐๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ over bilingual baselines, (ii) Technique of incorporating domain information into the language tokens helps multilingual multi-domain system in getting a significant average improvement of ๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ over the baselines, (iii) Multistage fine-tuning further helps in getting an improvement of ๐-๐.๐ ๐๐๐๐ ๐ฉ๐จ๐ข๐ง๐ญ๐ฌ for the language pair of interest.
%U https://aclanthology.org/2021.ranlp-1.83
%P 727-733
Markdown (Informal)
[Multilingual Multi-Domain NMT for Indian Languages](https://aclanthology.org/2021.ranlp-1.83) (Kumar et al., RANLP 2021)
ACL
- Sourav Kumar, Salil Aggarwal, and Dipti Sharma. 2021. Multilingual Multi-Domain NMT for Indian Languages. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021), pages 727โ733, Held Online. INCOMA Ltd..