@inproceedings{zaremoodi-haffari-2019-adaptively,
title = "Adaptively Scheduled Multitask Learning: The Case of Low-Resource Neural Machine Translation",
author = "Zaremoodi, Poorya and
Haffari, Gholamreza",
editor = "Birch, Alexandra and
Finch, Andrew and
Hayashi, Hiroaki and
Konstas, Ioannis and
Luong, Thang and
Neubig, Graham and
Oda, Yusuke and
Sudoh, Katsuhito",
booktitle = "Proceedings of the 3rd Workshop on Neural Generation and Translation",
month = nov,
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5618",
doi = "10.18653/v1/D19-5618",
pages = "177--186",
abstract = "Neural Machine Translation (NMT), a data-hungry technology, suffers from the lack of bilingual data in low-resource scenarios. Multitask learning (MTL) can alleviate this issue by injecting inductive biases into NMT, using auxiliary syntactic and semantic tasks. However, an effective \textit{training schedule} is required to balance the importance of tasks to get the best use of the training signal. The role of training schedule becomes even more crucial in \textit{biased-MTL} where the goal is to improve one (or a subset) of tasks the most, e.g. translation quality. Current approaches for biased-MTL are based on brittle \textit{hand-engineered} heuristics that require trial and error, and should be (re-)designed for each learning scenario. To the best of our knowledge, ours is the first work on \textit{adaptively} and \textit{dynamically} changing the training schedule in biased-MTL. We propose a rigorous approach for automatically reweighing the training data of the main and auxiliary tasks throughout the training process based on their contributions to the generalisability of the main NMT task. Our experiments on translating from English to Vietnamese/Turkish/Spanish show improvements of up to +1.2 BLEU points, compared to strong baselines. Additionally, our analyses shed light on the dynamic of needs throughout the training of NMT: from syntax to semantic.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zaremoodi-haffari-2019-adaptively">
<titleInfo>
<title>Adaptively Scheduled Multitask Learning: The Case of Low-Resource Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Poorya</namePart>
<namePart type="family">Zaremoodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gholamreza</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Neural Generation and Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Birch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Finch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroaki</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thang</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Oda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT), a data-hungry technology, suffers from the lack of bilingual data in low-resource scenarios. Multitask learning (MTL) can alleviate this issue by injecting inductive biases into NMT, using auxiliary syntactic and semantic tasks. However, an effective training schedule is required to balance the importance of tasks to get the best use of the training signal. The role of training schedule becomes even more crucial in biased-MTL where the goal is to improve one (or a subset) of tasks the most, e.g. translation quality. Current approaches for biased-MTL are based on brittle hand-engineered heuristics that require trial and error, and should be (re-)designed for each learning scenario. To the best of our knowledge, ours is the first work on adaptively and dynamically changing the training schedule in biased-MTL. We propose a rigorous approach for automatically reweighing the training data of the main and auxiliary tasks throughout the training process based on their contributions to the generalisability of the main NMT task. Our experiments on translating from English to Vietnamese/Turkish/Spanish show improvements of up to +1.2 BLEU points, compared to strong baselines. Additionally, our analyses shed light on the dynamic of needs throughout the training of NMT: from syntax to semantic.</abstract>
<identifier type="citekey">zaremoodi-haffari-2019-adaptively</identifier>
<identifier type="doi">10.18653/v1/D19-5618</identifier>
<location>
<url>https://aclanthology.org/D19-5618</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>177</start>
<end>186</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adaptively Scheduled Multitask Learning: The Case of Low-Resource Neural Machine Translation
%A Zaremoodi, Poorya
%A Haffari, Gholamreza
%Y Birch, Alexandra
%Y Finch, Andrew
%Y Hayashi, Hiroaki
%Y Konstas, Ioannis
%Y Luong, Thang
%Y Neubig, Graham
%Y Oda, Yusuke
%Y Sudoh, Katsuhito
%S Proceedings of the 3rd Workshop on Neural Generation and Translation
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong
%F zaremoodi-haffari-2019-adaptively
%X Neural Machine Translation (NMT), a data-hungry technology, suffers from the lack of bilingual data in low-resource scenarios. Multitask learning (MTL) can alleviate this issue by injecting inductive biases into NMT, using auxiliary syntactic and semantic tasks. However, an effective training schedule is required to balance the importance of tasks to get the best use of the training signal. The role of training schedule becomes even more crucial in biased-MTL where the goal is to improve one (or a subset) of tasks the most, e.g. translation quality. Current approaches for biased-MTL are based on brittle hand-engineered heuristics that require trial and error, and should be (re-)designed for each learning scenario. To the best of our knowledge, ours is the first work on adaptively and dynamically changing the training schedule in biased-MTL. We propose a rigorous approach for automatically reweighing the training data of the main and auxiliary tasks throughout the training process based on their contributions to the generalisability of the main NMT task. Our experiments on translating from English to Vietnamese/Turkish/Spanish show improvements of up to +1.2 BLEU points, compared to strong baselines. Additionally, our analyses shed light on the dynamic of needs throughout the training of NMT: from syntax to semantic.
%R 10.18653/v1/D19-5618
%U https://aclanthology.org/D19-5618
%U https://doi.org/10.18653/v1/D19-5618
%P 177-186
Markdown (Informal)
[Adaptively Scheduled Multitask Learning: The Case of Low-Resource Neural Machine Translation](https://aclanthology.org/D19-5618) (Zaremoodi & Haffari, NGT 2019)
ACL