@inproceedings{jawahar-seddah-2019-contextualized,
title = "Contextualized Diachronic Word Representations",
author = "Jawahar, Ganesh and
Seddah, Djam{\'e}",
editor = "Tahmasebi, Nina and
Borin, Lars and
Jatowt, Adam and
Xu, Yang",
booktitle = "Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4705",
doi = "10.18653/v1/W19-4705",
pages = "35--47",
abstract = "Diachronic word embeddings play a key role in capturing interesting patterns about how language evolves over time. Most of the existing work focuses on studying corpora spanning across several decades, which is understandably still not a possibility when working on social media-based user-generated content. In this work, we address the problem of studying semantic changes in a large Twitter corpus collected over five years, a much shorter period than what is usually the norm in diachronic studies. We devise a novel attentional model, based on Bernoulli word embeddings, that are conditioned on contextual extra-linguistic (social) features such as network, spatial and socio-economic variables, which are associated with Twitter users, as well as topic-based features. We posit that these social features provide an inductive bias that helps our model to overcome the narrow time-span regime problem. Our extensive experiments reveal that our proposed model is able to capture subtle semantic shifts without being biased towards frequency cues and also works well when certain contextual features are absent. Our model fits the data better than current state-of-the-art dynamic word embedding models and therefore is a promising tool to study diachronic semantic changes over small time periods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jawahar-seddah-2019-contextualized">
<titleInfo>
<title>Contextualized Diachronic Word Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ganesh</namePart>
<namePart type="family">Jawahar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Djamé</namePart>
<namePart type="family">Seddah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Tahmasebi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="family">Borin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Jatowt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Diachronic word embeddings play a key role in capturing interesting patterns about how language evolves over time. Most of the existing work focuses on studying corpora spanning across several decades, which is understandably still not a possibility when working on social media-based user-generated content. In this work, we address the problem of studying semantic changes in a large Twitter corpus collected over five years, a much shorter period than what is usually the norm in diachronic studies. We devise a novel attentional model, based on Bernoulli word embeddings, that are conditioned on contextual extra-linguistic (social) features such as network, spatial and socio-economic variables, which are associated with Twitter users, as well as topic-based features. We posit that these social features provide an inductive bias that helps our model to overcome the narrow time-span regime problem. Our extensive experiments reveal that our proposed model is able to capture subtle semantic shifts without being biased towards frequency cues and also works well when certain contextual features are absent. Our model fits the data better than current state-of-the-art dynamic word embedding models and therefore is a promising tool to study diachronic semantic changes over small time periods.</abstract>
<identifier type="citekey">jawahar-seddah-2019-contextualized</identifier>
<identifier type="doi">10.18653/v1/W19-4705</identifier>
<location>
<url>https://aclanthology.org/W19-4705</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>35</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Contextualized Diachronic Word Representations
%A Jawahar, Ganesh
%A Seddah, Djamé
%Y Tahmasebi, Nina
%Y Borin, Lars
%Y Jatowt, Adam
%Y Xu, Yang
%S Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F jawahar-seddah-2019-contextualized
%X Diachronic word embeddings play a key role in capturing interesting patterns about how language evolves over time. Most of the existing work focuses on studying corpora spanning across several decades, which is understandably still not a possibility when working on social media-based user-generated content. In this work, we address the problem of studying semantic changes in a large Twitter corpus collected over five years, a much shorter period than what is usually the norm in diachronic studies. We devise a novel attentional model, based on Bernoulli word embeddings, that are conditioned on contextual extra-linguistic (social) features such as network, spatial and socio-economic variables, which are associated with Twitter users, as well as topic-based features. We posit that these social features provide an inductive bias that helps our model to overcome the narrow time-span regime problem. Our extensive experiments reveal that our proposed model is able to capture subtle semantic shifts without being biased towards frequency cues and also works well when certain contextual features are absent. Our model fits the data better than current state-of-the-art dynamic word embedding models and therefore is a promising tool to study diachronic semantic changes over small time periods.
%R 10.18653/v1/W19-4705
%U https://aclanthology.org/W19-4705
%U https://doi.org/10.18653/v1/W19-4705
%P 35-47
Markdown (Informal)
[Contextualized Diachronic Word Representations](https://aclanthology.org/W19-4705) (Jawahar & Seddah, LChange 2019)
ACL
- Ganesh Jawahar and Djamé Seddah. 2019. Contextualized Diachronic Word Representations. In Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, pages 35–47, Florence, Italy. Association for Computational Linguistics.