@inproceedings{suresh-etal-2019-distilling,
title = "Distilling weighted finite automata from arbitrary probabilistic models",
author = "Suresh, Ananda Theertha and
Roark, Brian and
Riley, Michael and
Schogol, Vlad",
editor = "Vogler, Heiko and
Maletti, Andreas",
booktitle = "Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing",
month = sep,
year = "2019",
address = "Dresden, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3112",
doi = "10.18653/v1/W19-3112",
pages = "87--97",
abstract = "Weighted finite automata (WFA) are often used to represent probabilistic models, such as n-gram language models, since they are efficient for recognition tasks in time and space. The probabilistic source to be represented as a WFA, however, may come in many forms. Given a generic probabilistic model over sequences, we propose an algorithm to approximate it as a weighted finite automaton such that the Kullback-Leibler divergence between the source model and the WFA target model is minimized. The proposed algorithm involves a counting step and a difference of convex optimization, both of which can be performed efficiently. We demonstrate the usefulness of our approach on some tasks including distilling n-gram models from neural models.",
}

<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="suresh-etal-2019-distilling">
<titleInfo>
<title>Distilling weighted finite automata from arbitrary probabilistic models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ananda</namePart>
<namePart type="given">Theertha</namePart>
<namePart type="family">Suresh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Roark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Riley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vlad</namePart>
<namePart type="family">Schogol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heiko</namePart>
<namePart type="family">Vogler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Maletti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dresden, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Weighted finite automata (WFA) are often used to represent probabilistic models, such as n-gram language models, since they are efficient for recognition tasks in time and space. The probabilistic source to be represented as a WFA, however, may come in many forms. Given a generic probabilistic model over sequences, we propose an algorithm to approximate it as a weighted finite automaton such that the Kullback-Leibler divergence between the source model and the WFA target model is minimized. The proposed algorithm involves a counting step and a difference of convex optimization, both of which can be performed efficiently. We demonstrate the usefulness of our approach on some tasks including distilling n-gram models from neural models.</abstract>
<identifier type="citekey">suresh-etal-2019-distilling</identifier>
<identifier type="doi">10.18653/v1/W19-3112</identifier>
<location>
<url>https://aclanthology.org/W19-3112</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>87</start>
<end>97</end>
</extent>
</part>
</mods>
</modsCollection>

%0 Conference Proceedings
%T Distilling weighted finite automata from arbitrary probabilistic models
%A Suresh, Ananda Theertha
%A Roark, Brian
%A Riley, Michael
%A Schogol, Vlad
%Y Vogler, Heiko
%Y Maletti, Andreas
%S Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing
%D 2019
%8 September
%I Association for Computational Linguistics
%C Dresden, Germany
%F suresh-etal-2019-distilling
%X Weighted finite automata (WFA) are often used to represent probabilistic models, such as n-gram language models, since they are efficient for recognition tasks in time and space. The probabilistic source to be represented as a WFA, however, may come in many forms. Given a generic probabilistic model over sequences, we propose an algorithm to approximate it as a weighted finite automaton such that the Kullback-Leibler divergence between the source model and the WFA target model is minimized. The proposed algorithm involves a counting step and a difference of convex optimization, both of which can be performed efficiently. We demonstrate the usefulness of our approach on some tasks including distilling n-gram models from neural models.
%R 10.18653/v1/W19-3112
%U https://aclanthology.org/W19-3112
%U https://doi.org/10.18653/v1/W19-3112
%P 87-97

##### Markdown (Informal)

[Distilling weighted finite automata from arbitrary probabilistic models](https://aclanthology.org/W19-3112) (Suresh et al., FSMNLP 2019)

##### ACL