@inproceedings{pivovarova-etal-2019-word,
title = "Word Clustering for Historical Newspapers Analysis",
author = "Pivovarova, Lidia and
Zosa, Elaine and
Marjanen, Jani",
editor = "Vertan, Cristina and
Osenova, Petya and
Iliev, Dimitar",
booktitle = "Proceedings of the Workshop on Language Technology for Digital Historical Archives",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/W19-9002",
doi = "10.26615/978-954-452-059-5_002",
pages = "3--10",
abstract = "This paper is a part of a collaboration between computer scientists and historians aimed at development of novel tools and methods to improve analysis of historical newspapers. We present a case study of ideological terms ending with -ism suffix in nineteenth century Finnish newspapers. We propose a two-step procedure to trace differences in word usages over time: training of diachronic embeddings on several time slices and when clustering embeddings of selected words together with their neighbours to obtain historical context. The obtained clusters turn out to be useful for historical studies. The paper also discuss specific difficulties related to development historian-oriented tools.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pivovarova-etal-2019-word">
<titleInfo>
<title>Word Clustering for Historical Newspapers Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lidia</namePart>
<namePart type="family">Pivovarova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elaine</namePart>
<namePart type="family">Zosa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jani</namePart>
<namePart type="family">Marjanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Language Technology for Digital Historical Archives</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">Vertan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petya</namePart>
<namePart type="family">Osenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitar</namePart>
<namePart type="family">Iliev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper is a part of a collaboration between computer scientists and historians aimed at development of novel tools and methods to improve analysis of historical newspapers. We present a case study of ideological terms ending with -ism suffix in nineteenth century Finnish newspapers. We propose a two-step procedure to trace differences in word usages over time: training of diachronic embeddings on several time slices and when clustering embeddings of selected words together with their neighbours to obtain historical context. The obtained clusters turn out to be useful for historical studies. The paper also discuss specific difficulties related to development historian-oriented tools.</abstract>
<identifier type="citekey">pivovarova-etal-2019-word</identifier>
<identifier type="doi">10.26615/978-954-452-059-5_002</identifier>
<location>
<url>https://aclanthology.org/W19-9002</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>3</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word Clustering for Historical Newspapers Analysis
%A Pivovarova, Lidia
%A Zosa, Elaine
%A Marjanen, Jani
%Y Vertan, Cristina
%Y Osenova, Petya
%Y Iliev, Dimitar
%S Proceedings of the Workshop on Language Technology for Digital Historical Archives
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F pivovarova-etal-2019-word
%X This paper is a part of a collaboration between computer scientists and historians aimed at development of novel tools and methods to improve analysis of historical newspapers. We present a case study of ideological terms ending with -ism suffix in nineteenth century Finnish newspapers. We propose a two-step procedure to trace differences in word usages over time: training of diachronic embeddings on several time slices and when clustering embeddings of selected words together with their neighbours to obtain historical context. The obtained clusters turn out to be useful for historical studies. The paper also discuss specific difficulties related to development historian-oriented tools.
%R 10.26615/978-954-452-059-5_002
%U https://aclanthology.org/W19-9002
%U https://doi.org/10.26615/978-954-452-059-5_002
%P 3-10
Markdown (Informal)
[Word Clustering for Historical Newspapers Analysis](https://aclanthology.org/W19-9002) (Pivovarova et al., RANLP 2019)
ACL