@inproceedings{neishi-etal-2017-bag,
title = "A Bag of Useful Tricks for Practical Neural Machine Translation: Embedding Layer Initialization and Large Batch Size",
author = "Neishi, Masato and
Sakuma, Jin and
Tohda, Satoshi and
Ishiwatari, Shonosuke and
Yoshinaga, Naoki and
Toyoda, Masashi",
editor = "Nakazawa, Toshiaki and
Goto, Isao",
booktitle = "Proceedings of the 4th Workshop on {A}sian Translation ({WAT}2017)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Asian Federation of Natural Language Processing",
url = "https://aclanthology.org/W17-5708/",
pages = "99--109",
abstract = "In this paper, we describe the team UT-IIS`s system and results for the WAT 2017 translation tasks. We further investigated several tricks including a novel technique for initializing embedding layers using only the parallel corpus, which increased the BLEU score by 1.28, found a practical large batch size of 256, and gained insights regarding hyperparameter settings. Ultimately, our system obtained a better result than the state-of-the-art system of WAT 2016. Our code is available on \url{https://github.com/nem6ishi/wat17}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="neishi-etal-2017-bag">
<titleInfo>
<title>A Bag of Useful Tricks for Practical Neural Machine Translation: Embedding Layer Initialization and Large Batch Size</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masato</namePart>
<namePart type="family">Neishi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Sakuma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Tohda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shonosuke</namePart>
<namePart type="family">Ishiwatari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoki</namePart>
<namePart type="family">Yoshinaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masashi</namePart>
<namePart type="family">Toyoda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Asian Translation (WAT2017)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Asian Federation of Natural Language Processing</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe the team UT-IIS‘s system and results for the WAT 2017 translation tasks. We further investigated several tricks including a novel technique for initializing embedding layers using only the parallel corpus, which increased the BLEU score by 1.28, found a practical large batch size of 256, and gained insights regarding hyperparameter settings. Ultimately, our system obtained a better result than the state-of-the-art system of WAT 2016. Our code is available on https://github.com/nem6ishi/wat17.</abstract>
<identifier type="citekey">neishi-etal-2017-bag</identifier>
<location>
<url>https://aclanthology.org/W17-5708/</url>
</location>
<part>
<date>2017-11</date>
<extent unit="page">
<start>99</start>
<end>109</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Bag of Useful Tricks for Practical Neural Machine Translation: Embedding Layer Initialization and Large Batch Size
%A Neishi, Masato
%A Sakuma, Jin
%A Tohda, Satoshi
%A Ishiwatari, Shonosuke
%A Yoshinaga, Naoki
%A Toyoda, Masashi
%Y Nakazawa, Toshiaki
%Y Goto, Isao
%S Proceedings of the 4th Workshop on Asian Translation (WAT2017)
%D 2017
%8 November
%I Asian Federation of Natural Language Processing
%C Taipei, Taiwan
%F neishi-etal-2017-bag
%X In this paper, we describe the team UT-IIS‘s system and results for the WAT 2017 translation tasks. We further investigated several tricks including a novel technique for initializing embedding layers using only the parallel corpus, which increased the BLEU score by 1.28, found a practical large batch size of 256, and gained insights regarding hyperparameter settings. Ultimately, our system obtained a better result than the state-of-the-art system of WAT 2016. Our code is available on https://github.com/nem6ishi/wat17.
%U https://aclanthology.org/W17-5708/
%P 99-109
Markdown (Informal)
[A Bag of Useful Tricks for Practical Neural Machine Translation: Embedding Layer Initialization and Large Batch Size](https://aclanthology.org/W17-5708/) (Neishi et al., WAT 2017)
ACL