@article{pelemans-etal-2016-sparse,
title = "Sparse Non-negative Matrix Language Modeling",
author = "Pelemans, Joris and
Shazeer, Noam and
Chelba, Ciprian",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "4",
year = "2016",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q16-1024",
doi = "10.1162/tacl_a_00102",
pages = "329--342",
abstract = "We present Sparse Non-negative Matrix (SNM) estimation, a novel probability estimation technique for language modeling that can efficiently incorporate arbitrary features. We evaluate SNM language models on two corpora: the One Billion Word Benchmark and a subset of the LDC English Gigaword corpus. Results show that SNM language models trained with n-gram features are a close match for the well-established Kneser-Ney models. The addition of skip-gram features yields a model that is in the same league as the state-of-the-art recurrent neural network language models, as well as complementary: combining the two modeling techniques yields the best known result on the One Billion Word Benchmark. On the Gigaword corpus further improvements are observed using features that cross sentence boundaries. The computational advantages of SNM estimation over both maximum entropy and neural network estimation are probably its main strength, promising an approach that has large flexibility in combining arbitrary features and yet scales gracefully to large amounts of data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pelemans-etal-2016-sparse">
<titleInfo>
<title>Sparse Non-negative Matrix Language Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joris</namePart>
<namePart type="family">Pelemans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noam</namePart>
<namePart type="family">Shazeer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ciprian</namePart>
<namePart type="family">Chelba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We present Sparse Non-negative Matrix (SNM) estimation, a novel probability estimation technique for language modeling that can efficiently incorporate arbitrary features. We evaluate SNM language models on two corpora: the One Billion Word Benchmark and a subset of the LDC English Gigaword corpus. Results show that SNM language models trained with n-gram features are a close match for the well-established Kneser-Ney models. The addition of skip-gram features yields a model that is in the same league as the state-of-the-art recurrent neural network language models, as well as complementary: combining the two modeling techniques yields the best known result on the One Billion Word Benchmark. On the Gigaword corpus further improvements are observed using features that cross sentence boundaries. The computational advantages of SNM estimation over both maximum entropy and neural network estimation are probably its main strength, promising an approach that has large flexibility in combining arbitrary features and yet scales gracefully to large amounts of data.</abstract>
<identifier type="citekey">pelemans-etal-2016-sparse</identifier>
<identifier type="doi">10.1162/tacl_a_00102</identifier>
<location>
<url>https://aclanthology.org/Q16-1024</url>
</location>
<part>
<date>2016</date>
<detail type="volume"><number>4</number></detail>
<extent unit="page">
<start>329</start>
<end>342</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Sparse Non-negative Matrix Language Modeling
%A Pelemans, Joris
%A Shazeer, Noam
%A Chelba, Ciprian
%J Transactions of the Association for Computational Linguistics
%D 2016
%V 4
%I MIT Press
%C Cambridge, MA
%F pelemans-etal-2016-sparse
%X We present Sparse Non-negative Matrix (SNM) estimation, a novel probability estimation technique for language modeling that can efficiently incorporate arbitrary features. We evaluate SNM language models on two corpora: the One Billion Word Benchmark and a subset of the LDC English Gigaword corpus. Results show that SNM language models trained with n-gram features are a close match for the well-established Kneser-Ney models. The addition of skip-gram features yields a model that is in the same league as the state-of-the-art recurrent neural network language models, as well as complementary: combining the two modeling techniques yields the best known result on the One Billion Word Benchmark. On the Gigaword corpus further improvements are observed using features that cross sentence boundaries. The computational advantages of SNM estimation over both maximum entropy and neural network estimation are probably its main strength, promising an approach that has large flexibility in combining arbitrary features and yet scales gracefully to large amounts of data.
%R 10.1162/tacl_a_00102
%U https://aclanthology.org/Q16-1024
%U https://doi.org/10.1162/tacl_a_00102
%P 329-342
Markdown (Informal)
[Sparse Non-negative Matrix Language Modeling](https://aclanthology.org/Q16-1024) (Pelemans et al., TACL 2016)
ACL