@article{schofield-mimno-2016-comparing,
title = "Comparing Apples to Apple: The Effects of Stemmers on Topic Models",
author = "Schofield, Alexandra and
Mimno, David",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "4",
year = "2016",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q16-1021/",
doi = "10.1162/tacl_a_00099",
pages = "287--300",
abstract = "Rule-based stemmers such as the Porter stemmer are frequently used to preprocess English corpora for topic modeling. In this work, we train and evaluate topic models on a variety of corpora using several different stemming algorithms. We examine several different quantitative measures of the resulting models, including likelihood, coherence, model stability, and entropy. Despite their frequent use in topic modeling, we find that stemmers produce no meaningful improvement in likelihood and coherence and in fact can degrade topic stability."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schofield-mimno-2016-comparing">
<titleInfo>
<title>Comparing Apples to Apple: The Effects of Stemmers on Topic Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Schofield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Mimno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Rule-based stemmers such as the Porter stemmer are frequently used to preprocess English corpora for topic modeling. In this work, we train and evaluate topic models on a variety of corpora using several different stemming algorithms. We examine several different quantitative measures of the resulting models, including likelihood, coherence, model stability, and entropy. Despite their frequent use in topic modeling, we find that stemmers produce no meaningful improvement in likelihood and coherence and in fact can degrade topic stability.</abstract>
<identifier type="citekey">schofield-mimno-2016-comparing</identifier>
<identifier type="doi">10.1162/tacl_a_00099</identifier>
<location>
<url>https://aclanthology.org/Q16-1021/</url>
</location>
<part>
<date>2016</date>
<detail type="volume"><number>4</number></detail>
<extent unit="page">
<start>287</start>
<end>300</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Comparing Apples to Apple: The Effects of Stemmers on Topic Models
%A Schofield, Alexandra
%A Mimno, David
%J Transactions of the Association for Computational Linguistics
%D 2016
%V 4
%I MIT Press
%C Cambridge, MA
%F schofield-mimno-2016-comparing
%X Rule-based stemmers such as the Porter stemmer are frequently used to preprocess English corpora for topic modeling. In this work, we train and evaluate topic models on a variety of corpora using several different stemming algorithms. We examine several different quantitative measures of the resulting models, including likelihood, coherence, model stability, and entropy. Despite their frequent use in topic modeling, we find that stemmers produce no meaningful improvement in likelihood and coherence and in fact can degrade topic stability.
%R 10.1162/tacl_a_00099
%U https://aclanthology.org/Q16-1021/
%U https://doi.org/10.1162/tacl_a_00099
%P 287-300
Markdown (Informal)
[Comparing Apples to Apple: The Effects of Stemmers on Topic Models](https://aclanthology.org/Q16-1021/) (Schofield & Mimno, TACL 2016)
ACL