@inproceedings{dukic-etal-2025-characterizing,
title = "Characterizing Linguistic Shifts in {C}roatian News via Diachronic Word Embeddings",
author = "Duki{\'c}, David and
Bari{\'c}, Ana and
{\v{C}}uljak, Marko and
Juki{\'c}, Josip and
Tutek, Martin",
editor = "Piskorski, Jakub and
P{\v{r}}ib{\'a}{\v{n}}, Pavel and
Nakov, Preslav and
Yangarber, Roman and
Marcinczuk, Michal",
booktitle = "Proceedings of the 10th Workshop on Slavic Natural Language Processing (Slavic NLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bsnlp-1.13/",
doi = "10.18653/v1/2025.bsnlp-1.13",
pages = "108--115",
ISBN = "978-1-959429-57-9",
abstract = "Measuring how semantics of words change over time improves our understanding of how cultures and perspectives change. Diachronic word embeddings help us quantify this shift, although previous studies leveraged substantial temporally annotated corpora. In this work, we use a corpus of 9.5 million Croatian news articles spanning the past 25 years and quantify semantic change using skip-gram word embeddings trained on five-year periods. Our analysis finds that word embeddings capture linguistic shifts of terms pertaining to major topics in this timespan (COVID-19, Croatia joining the European Union, technological advancements). We also find evidence that embeddings from post-2020 encode increased positivity in sentiment analysis tasks, contrasting studies reporting a decline in mental health over the same period."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dukic-etal-2025-characterizing">
<titleInfo>
<title>Characterizing Linguistic Shifts in Croatian News via Diachronic Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Dukić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Barić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Čuljak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josip</namePart>
<namePart type="family">Jukić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Tutek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Slavic Natural Language Processing (Slavic NLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Přibáň</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Marcinczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-1-959429-57-9</identifier>
</relatedItem>
<abstract>Measuring how semantics of words change over time improves our understanding of how cultures and perspectives change. Diachronic word embeddings help us quantify this shift, although previous studies leveraged substantial temporally annotated corpora. In this work, we use a corpus of 9.5 million Croatian news articles spanning the past 25 years and quantify semantic change using skip-gram word embeddings trained on five-year periods. Our analysis finds that word embeddings capture linguistic shifts of terms pertaining to major topics in this timespan (COVID-19, Croatia joining the European Union, technological advancements). We also find evidence that embeddings from post-2020 encode increased positivity in sentiment analysis tasks, contrasting studies reporting a decline in mental health over the same period.</abstract>
<identifier type="citekey">dukic-etal-2025-characterizing</identifier>
<identifier type="doi">10.18653/v1/2025.bsnlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2025.bsnlp-1.13/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>108</start>
<end>115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Characterizing Linguistic Shifts in Croatian News via Diachronic Word Embeddings
%A Dukić, David
%A Barić, Ana
%A Čuljak, Marko
%A Jukić, Josip
%A Tutek, Martin
%Y Piskorski, Jakub
%Y Přibáň, Pavel
%Y Nakov, Preslav
%Y Yangarber, Roman
%Y Marcinczuk, Michal
%S Proceedings of the 10th Workshop on Slavic Natural Language Processing (Slavic NLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 978-1-959429-57-9
%F dukic-etal-2025-characterizing
%X Measuring how semantics of words change over time improves our understanding of how cultures and perspectives change. Diachronic word embeddings help us quantify this shift, although previous studies leveraged substantial temporally annotated corpora. In this work, we use a corpus of 9.5 million Croatian news articles spanning the past 25 years and quantify semantic change using skip-gram word embeddings trained on five-year periods. Our analysis finds that word embeddings capture linguistic shifts of terms pertaining to major topics in this timespan (COVID-19, Croatia joining the European Union, technological advancements). We also find evidence that embeddings from post-2020 encode increased positivity in sentiment analysis tasks, contrasting studies reporting a decline in mental health over the same period.
%R 10.18653/v1/2025.bsnlp-1.13
%U https://aclanthology.org/2025.bsnlp-1.13/
%U https://doi.org/10.18653/v1/2025.bsnlp-1.13
%P 108-115
Markdown (Informal)
[Characterizing Linguistic Shifts in Croatian News via Diachronic Word Embeddings](https://aclanthology.org/2025.bsnlp-1.13/) (Dukić et al., BSNLP 2025)
ACL