@inproceedings{liu-etal-2018-distilling,
title = "Distilling Knowledge for Search-based Structured Prediction",
author = "Liu, Yijia and
Che, Wanxiang and
Zhao, Huaipeng and
Qin, Bing and
Liu, Ting",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-1129",
doi = "10.18653/v1/P18-1129",
pages = "1393--1402",
abstract = "Many natural language processing tasks can be modeled into structured prediction and solved as a search problem. In this paper, we distill an ensemble of multiple models trained with different initialization into a single model. In addition to learning to match the ensemble{'}s probability output on the reference states, we also use the ensemble to explore the search space and learn from the encountered states in the exploration. Experimental results on two typical search-based structured prediction tasks {--} transition-based dependency parsing and neural machine translation show that distillation can effectively improve the single model{'}s performance and the final model achieves improvements of 1.32 in LAS and 2.65 in BLEU score on these two tasks respectively over strong baselines and it outperforms the greedy structured prediction models in previous literatures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2018-distilling">
<titleInfo>
<title>Distilling Knowledge for Search-based Structured Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yijia</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huaipeng</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ting</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many natural language processing tasks can be modeled into structured prediction and solved as a search problem. In this paper, we distill an ensemble of multiple models trained with different initialization into a single model. In addition to learning to match the ensemble’s probability output on the reference states, we also use the ensemble to explore the search space and learn from the encountered states in the exploration. Experimental results on two typical search-based structured prediction tasks – transition-based dependency parsing and neural machine translation show that distillation can effectively improve the single model’s performance and the final model achieves improvements of 1.32 in LAS and 2.65 in BLEU score on these two tasks respectively over strong baselines and it outperforms the greedy structured prediction models in previous literatures.</abstract>
<identifier type="citekey">liu-etal-2018-distilling</identifier>
<identifier type="doi">10.18653/v1/P18-1129</identifier>
<location>
<url>https://aclanthology.org/P18-1129</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>1393</start>
<end>1402</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Distilling Knowledge for Search-based Structured Prediction
%A Liu, Yijia
%A Che, Wanxiang
%A Zhao, Huaipeng
%A Qin, Bing
%A Liu, Ting
%Y Gurevych, Iryna
%Y Miyao, Yusuke
%S Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F liu-etal-2018-distilling
%X Many natural language processing tasks can be modeled into structured prediction and solved as a search problem. In this paper, we distill an ensemble of multiple models trained with different initialization into a single model. In addition to learning to match the ensemble’s probability output on the reference states, we also use the ensemble to explore the search space and learn from the encountered states in the exploration. Experimental results on two typical search-based structured prediction tasks – transition-based dependency parsing and neural machine translation show that distillation can effectively improve the single model’s performance and the final model achieves improvements of 1.32 in LAS and 2.65 in BLEU score on these two tasks respectively over strong baselines and it outperforms the greedy structured prediction models in previous literatures.
%R 10.18653/v1/P18-1129
%U https://aclanthology.org/P18-1129
%U https://doi.org/10.18653/v1/P18-1129
%P 1393-1402
Markdown (Informal)
[Distilling Knowledge for Search-based Structured Prediction](https://aclanthology.org/P18-1129) (Liu et al., ACL 2018)
ACL
- Yijia Liu, Wanxiang Che, Huaipeng Zhao, Bing Qin, and Ting Liu. 2018. Distilling Knowledge for Search-based Structured Prediction. In Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 1393–1402, Melbourne, Australia. Association for Computational Linguistics.