@inproceedings{parshakova-etal-2019-global,
title = "Global Autoregressive Models for Data-Efficient Sequence Learning",
author = "Parshakova, Tetiana and
Andreoli, Jean-Marc and
Dymetman, Marc",
editor = "Bansal, Mohit and
Villavicencio, Aline",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K19-1084",
doi = "10.18653/v1/K19-1084",
pages = "900--909",
abstract = "Standard autoregressive seq2seq models are easily trained by max-likelihood, but tend to show poor results under small-data conditions. We introduce a class of seq2seq models, GAMs (Global Autoregressive Models), which combine an autoregressive component with a log-linear component, allowing the use of global \textit{a priori} features to compensate for lack of data. We train these models in two steps. In the first step, we obtain an \textit{unnormalized} GAM that maximizes the likelihood of the data, but is improper for fast inference or evaluation. In the second step, we use this GAM to train (by distillation) a second autoregressive model that approximates the \textit{normalized} distribution associated with the GAM, and can be used for fast inference and evaluation. Our experiments focus on language modelling under synthetic conditions and show a strong perplexity reduction of using the second autoregressive model over the standard one.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parshakova-etal-2019-global">
<titleInfo>
<title>Global Autoregressive Models for Data-Efficient Sequence Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tetiana</namePart>
<namePart type="family">Parshakova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Marc</namePart>
<namePart type="family">Andreoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Dymetman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Standard autoregressive seq2seq models are easily trained by max-likelihood, but tend to show poor results under small-data conditions. We introduce a class of seq2seq models, GAMs (Global Autoregressive Models), which combine an autoregressive component with a log-linear component, allowing the use of global a priori features to compensate for lack of data. We train these models in two steps. In the first step, we obtain an unnormalized GAM that maximizes the likelihood of the data, but is improper for fast inference or evaluation. In the second step, we use this GAM to train (by distillation) a second autoregressive model that approximates the normalized distribution associated with the GAM, and can be used for fast inference and evaluation. Our experiments focus on language modelling under synthetic conditions and show a strong perplexity reduction of using the second autoregressive model over the standard one.</abstract>
<identifier type="citekey">parshakova-etal-2019-global</identifier>
<identifier type="doi">10.18653/v1/K19-1084</identifier>
<location>
<url>https://aclanthology.org/K19-1084</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>900</start>
<end>909</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Global Autoregressive Models for Data-Efficient Sequence Learning
%A Parshakova, Tetiana
%A Andreoli, Jean-Marc
%A Dymetman, Marc
%Y Bansal, Mohit
%Y Villavicencio, Aline
%S Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F parshakova-etal-2019-global
%X Standard autoregressive seq2seq models are easily trained by max-likelihood, but tend to show poor results under small-data conditions. We introduce a class of seq2seq models, GAMs (Global Autoregressive Models), which combine an autoregressive component with a log-linear component, allowing the use of global a priori features to compensate for lack of data. We train these models in two steps. In the first step, we obtain an unnormalized GAM that maximizes the likelihood of the data, but is improper for fast inference or evaluation. In the second step, we use this GAM to train (by distillation) a second autoregressive model that approximates the normalized distribution associated with the GAM, and can be used for fast inference and evaluation. Our experiments focus on language modelling under synthetic conditions and show a strong perplexity reduction of using the second autoregressive model over the standard one.
%R 10.18653/v1/K19-1084
%U https://aclanthology.org/K19-1084
%U https://doi.org/10.18653/v1/K19-1084
%P 900-909
Markdown (Informal)
[Global Autoregressive Models for Data-Efficient Sequence Learning](https://aclanthology.org/K19-1084) (Parshakova et al., CoNLL 2019)
ACL