@inproceedings{boldsen-etal-2019-identifying,
title = "Identifying Temporal Trends Based on Perplexity and Clustering: Are We Looking at Language Change?",
author = "Boldsen, Sidsel and
Agirrezabal, Manex and
Paggio, Patrizia",
editor = "Tahmasebi, Nina and
Borin, Lars and
Jatowt, Adam and
Xu, Yang",
booktitle = "Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4711/",
doi = "10.18653/v1/W19-4711",
pages = "86--91",
abstract = "In this work we propose a data-driven methodology for identifying temporal trends in a corpus of medieval charters. We have used perplexities derived from RNNs as a distance measure between documents and then, performed clustering on those distances. We argue that perplexities calculated by such language models are representative of temporal trends. The clusters produced using the K-Means algorithm give an insight of the differences in language in different time periods at least partly due to language change. We suggest that the temporal distribution of the individual clusters might provide a more nuanced picture of temporal trends compared to discrete bins, thus providing better results when used in a classification task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="boldsen-etal-2019-identifying">
<titleInfo>
<title>Identifying Temporal Trends Based on Perplexity and Clustering: Are We Looking at Language Change?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sidsel</namePart>
<namePart type="family">Boldsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manex</namePart>
<namePart type="family">Agirrezabal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Tahmasebi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="family">Borin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Jatowt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work we propose a data-driven methodology for identifying temporal trends in a corpus of medieval charters. We have used perplexities derived from RNNs as a distance measure between documents and then, performed clustering on those distances. We argue that perplexities calculated by such language models are representative of temporal trends. The clusters produced using the K-Means algorithm give an insight of the differences in language in different time periods at least partly due to language change. We suggest that the temporal distribution of the individual clusters might provide a more nuanced picture of temporal trends compared to discrete bins, thus providing better results when used in a classification task.</abstract>
<identifier type="citekey">boldsen-etal-2019-identifying</identifier>
<identifier type="doi">10.18653/v1/W19-4711</identifier>
<location>
<url>https://aclanthology.org/W19-4711/</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>86</start>
<end>91</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Temporal Trends Based on Perplexity and Clustering: Are We Looking at Language Change?
%A Boldsen, Sidsel
%A Agirrezabal, Manex
%A Paggio, Patrizia
%Y Tahmasebi, Nina
%Y Borin, Lars
%Y Jatowt, Adam
%Y Xu, Yang
%S Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F boldsen-etal-2019-identifying
%X In this work we propose a data-driven methodology for identifying temporal trends in a corpus of medieval charters. We have used perplexities derived from RNNs as a distance measure between documents and then, performed clustering on those distances. We argue that perplexities calculated by such language models are representative of temporal trends. The clusters produced using the K-Means algorithm give an insight of the differences in language in different time periods at least partly due to language change. We suggest that the temporal distribution of the individual clusters might provide a more nuanced picture of temporal trends compared to discrete bins, thus providing better results when used in a classification task.
%R 10.18653/v1/W19-4711
%U https://aclanthology.org/W19-4711/
%U https://doi.org/10.18653/v1/W19-4711
%P 86-91
Markdown (Informal)
[Identifying Temporal Trends Based on Perplexity and Clustering: Are We Looking at Language Change?](https://aclanthology.org/W19-4711/) (Boldsen et al., LChange 2019)
ACL