@inproceedings{zaremoodi-etal-2018-adaptive,
title = "Adaptive Knowledge Sharing in Multi-Task Learning: Improving Low-Resource Neural Machine Translation",
author = "Zaremoodi, Poorya and
Buntine, Wray and
Haffari, Gholamreza",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-2104/",
doi = "10.18653/v1/P18-2104",
pages = "656--661",
abstract = "Neural Machine Translation (NMT) is notorious for its need for large amounts of bilingual data. An effective approach to compensate for this requirement is Multi-Task Learning (MTL) to leverage different linguistic resources as a source of inductive bias. Current MTL architectures are based on the Seq2Seq transduction, and (partially) share different components of the models among the tasks. However, this MTL approach often suffers from task interference and is not able to fully capture commonalities among subsets of tasks. We address this issue by extending the recurrent units with multiple {\textquotedblleft}blocks{\textquotedblright} along with a trainable {\textquotedblleft}routing network{\textquotedblright}. The routing network enables adaptive collaboration by dynamic sharing of blocks conditioned on the task at hand, input, and model state. Empirical evaluation of two low-resource translation tasks, English to Vietnamese and Farsi, show +1 BLEU score improvements compared to strong baselines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zaremoodi-etal-2018-adaptive">
<titleInfo>
<title>Adaptive Knowledge Sharing in Multi-Task Learning: Improving Low-Resource Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Poorya</namePart>
<namePart type="family">Zaremoodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wray</namePart>
<namePart type="family">Buntine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gholamreza</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) is notorious for its need for large amounts of bilingual data. An effective approach to compensate for this requirement is Multi-Task Learning (MTL) to leverage different linguistic resources as a source of inductive bias. Current MTL architectures are based on the Seq2Seq transduction, and (partially) share different components of the models among the tasks. However, this MTL approach often suffers from task interference and is not able to fully capture commonalities among subsets of tasks. We address this issue by extending the recurrent units with multiple “blocks” along with a trainable “routing network”. The routing network enables adaptive collaboration by dynamic sharing of blocks conditioned on the task at hand, input, and model state. Empirical evaluation of two low-resource translation tasks, English to Vietnamese and Farsi, show +1 BLEU score improvements compared to strong baselines.</abstract>
<identifier type="citekey">zaremoodi-etal-2018-adaptive</identifier>
<identifier type="doi">10.18653/v1/P18-2104</identifier>
<location>
<url>https://aclanthology.org/P18-2104/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>656</start>
<end>661</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adaptive Knowledge Sharing in Multi-Task Learning: Improving Low-Resource Neural Machine Translation
%A Zaremoodi, Poorya
%A Buntine, Wray
%A Haffari, Gholamreza
%Y Gurevych, Iryna
%Y Miyao, Yusuke
%S Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F zaremoodi-etal-2018-adaptive
%X Neural Machine Translation (NMT) is notorious for its need for large amounts of bilingual data. An effective approach to compensate for this requirement is Multi-Task Learning (MTL) to leverage different linguistic resources as a source of inductive bias. Current MTL architectures are based on the Seq2Seq transduction, and (partially) share different components of the models among the tasks. However, this MTL approach often suffers from task interference and is not able to fully capture commonalities among subsets of tasks. We address this issue by extending the recurrent units with multiple “blocks” along with a trainable “routing network”. The routing network enables adaptive collaboration by dynamic sharing of blocks conditioned on the task at hand, input, and model state. Empirical evaluation of two low-resource translation tasks, English to Vietnamese and Farsi, show +1 BLEU score improvements compared to strong baselines.
%R 10.18653/v1/P18-2104
%U https://aclanthology.org/P18-2104/
%U https://doi.org/10.18653/v1/P18-2104
%P 656-661
Markdown (Informal)
[Adaptive Knowledge Sharing in Multi-Task Learning: Improving Low-Resource Neural Machine Translation](https://aclanthology.org/P18-2104/) (Zaremoodi et al., ACL 2018)
ACL