@article{zhang-duh-2020-reproducible,
title = "Reproducible and Efficient Benchmarks for Hyperparameter Optimization of Neural Machine Translation Systems",
author = "Zhang, Xuan and
Duh, Kevin",
editor = "Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "8",
year = "2020",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2020.tacl-1.26",
doi = "10.1162/tacl_a_00322",
pages = "393--408",
abstract = "Hyperparameter selection is a crucial part of building neural machine translation (NMT) systems across both academia and industry. Fine-grained adjustments to a model{'}s architecture or training recipe can mean the difference between a positive and negative research result or between a state-of-the-art and underperforming system. While recent literature has proposed methods for automatic hyperparameter optimization (HPO), there has been limited work on applying these methods to neural machine translation (NMT), due in part to the high costs associated with experiments that train large numbers of model variants. To facilitate research in this space, we introduce a lookup-based approach that uses a library of pre-trained models for fast, low cost HPO experimentation. Our contributions include (1) the release of a large collection of trained NMT models covering a wide range of hyperparameters, (2) the proposal of targeted metrics for evaluating HPO methods on NMT, and (3) a reproducible benchmark of several HPO methods against our model library, including novel graph-based and multiobjective methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-duh-2020-reproducible">
<titleInfo>
<title>Reproducible and Efficient Benchmarks for Hyperparameter Optimization of Neural Machine Translation Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xuan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Hyperparameter selection is a crucial part of building neural machine translation (NMT) systems across both academia and industry. Fine-grained adjustments to a model’s architecture or training recipe can mean the difference between a positive and negative research result or between a state-of-the-art and underperforming system. While recent literature has proposed methods for automatic hyperparameter optimization (HPO), there has been limited work on applying these methods to neural machine translation (NMT), due in part to the high costs associated with experiments that train large numbers of model variants. To facilitate research in this space, we introduce a lookup-based approach that uses a library of pre-trained models for fast, low cost HPO experimentation. Our contributions include (1) the release of a large collection of trained NMT models covering a wide range of hyperparameters, (2) the proposal of targeted metrics for evaluating HPO methods on NMT, and (3) a reproducible benchmark of several HPO methods against our model library, including novel graph-based and multiobjective methods.</abstract>
<identifier type="citekey">zhang-duh-2020-reproducible</identifier>
<identifier type="doi">10.1162/tacl_a_00322</identifier>
<location>
<url>https://aclanthology.org/2020.tacl-1.26</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>393</start>
<end>408</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Reproducible and Efficient Benchmarks for Hyperparameter Optimization of Neural Machine Translation Systems
%A Zhang, Xuan
%A Duh, Kevin
%J Transactions of the Association for Computational Linguistics
%D 2020
%V 8
%I MIT Press
%C Cambridge, MA
%F zhang-duh-2020-reproducible
%X Hyperparameter selection is a crucial part of building neural machine translation (NMT) systems across both academia and industry. Fine-grained adjustments to a model’s architecture or training recipe can mean the difference between a positive and negative research result or between a state-of-the-art and underperforming system. While recent literature has proposed methods for automatic hyperparameter optimization (HPO), there has been limited work on applying these methods to neural machine translation (NMT), due in part to the high costs associated with experiments that train large numbers of model variants. To facilitate research in this space, we introduce a lookup-based approach that uses a library of pre-trained models for fast, low cost HPO experimentation. Our contributions include (1) the release of a large collection of trained NMT models covering a wide range of hyperparameters, (2) the proposal of targeted metrics for evaluating HPO methods on NMT, and (3) a reproducible benchmark of several HPO methods against our model library, including novel graph-based and multiobjective methods.
%R 10.1162/tacl_a_00322
%U https://aclanthology.org/2020.tacl-1.26
%U https://doi.org/10.1162/tacl_a_00322
%P 393-408
Markdown (Informal)
[Reproducible and Efficient Benchmarks for Hyperparameter Optimization of Neural Machine Translation Systems](https://aclanthology.org/2020.tacl-1.26) (Zhang & Duh, TACL 2020)
ACL