@inproceedings{rieger-etal-2021-rollinglda-update,
title = "{RollingLDA}: {A}n Update Algorithm of {L}atent {D}irichlet {A}llocation to Construct Consistent Time Series from Textual Data",
author = {Rieger, Jonas and
Jentsch, Carsten and
Rahnenf{\"u}hrer, J{\"o}rg},
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.201",
doi = "10.18653/v1/2021.findings-emnlp.201",
pages = "2337--2347",
abstract = "We propose a rolling version of the Latent Dirichlet Allocation, called RollingLDA. By a sequential approach, it enables the construction of LDA-based time series of topics that are consistent with previous states of LDA models. After an initial modeling, updates can be computed efficiently, allowing for real-time monitoring and detection of events or structural breaks. For this purpose, we propose suitable similarity measures for topics and provide simulation evidence of superiority over other commonly used approaches. The adequacy of the resulting method is illustrated by an application to an example corpus. In particular, we compute the similarity of sequentially obtained topic and word distributions over consecutive time periods. For a representative example corpus consisting of The New York Times articles from 1980 to 2020, we analyze the effect of several tuning parameter choices and we run the RollingLDA method on the full dataset of approximately 4 million articles to demonstrate its feasibility.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rieger-etal-2021-rollinglda-update">
<titleInfo>
<title>RollingLDA: An Update Algorithm of Latent Dirichlet Allocation to Construct Consistent Time Series from Textual Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Rieger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carsten</namePart>
<namePart type="family">Jentsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Rahnenführer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a rolling version of the Latent Dirichlet Allocation, called RollingLDA. By a sequential approach, it enables the construction of LDA-based time series of topics that are consistent with previous states of LDA models. After an initial modeling, updates can be computed efficiently, allowing for real-time monitoring and detection of events or structural breaks. For this purpose, we propose suitable similarity measures for topics and provide simulation evidence of superiority over other commonly used approaches. The adequacy of the resulting method is illustrated by an application to an example corpus. In particular, we compute the similarity of sequentially obtained topic and word distributions over consecutive time periods. For a representative example corpus consisting of The New York Times articles from 1980 to 2020, we analyze the effect of several tuning parameter choices and we run the RollingLDA method on the full dataset of approximately 4 million articles to demonstrate its feasibility.</abstract>
<identifier type="citekey">rieger-etal-2021-rollinglda-update</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.201</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.201</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>2337</start>
<end>2347</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RollingLDA: An Update Algorithm of Latent Dirichlet Allocation to Construct Consistent Time Series from Textual Data
%A Rieger, Jonas
%A Jentsch, Carsten
%A Rahnenführer, Jörg
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F rieger-etal-2021-rollinglda-update
%X We propose a rolling version of the Latent Dirichlet Allocation, called RollingLDA. By a sequential approach, it enables the construction of LDA-based time series of topics that are consistent with previous states of LDA models. After an initial modeling, updates can be computed efficiently, allowing for real-time monitoring and detection of events or structural breaks. For this purpose, we propose suitable similarity measures for topics and provide simulation evidence of superiority over other commonly used approaches. The adequacy of the resulting method is illustrated by an application to an example corpus. In particular, we compute the similarity of sequentially obtained topic and word distributions over consecutive time periods. For a representative example corpus consisting of The New York Times articles from 1980 to 2020, we analyze the effect of several tuning parameter choices and we run the RollingLDA method on the full dataset of approximately 4 million articles to demonstrate its feasibility.
%R 10.18653/v1/2021.findings-emnlp.201
%U https://aclanthology.org/2021.findings-emnlp.201
%U https://doi.org/10.18653/v1/2021.findings-emnlp.201
%P 2337-2347
Markdown (Informal)
[RollingLDA: An Update Algorithm of Latent Dirichlet Allocation to Construct Consistent Time Series from Textual Data](https://aclanthology.org/2021.findings-emnlp.201) (Rieger et al., Findings 2021)
ACL