@inproceedings{aida-bollegala-2023-swap,
title = "Swap and Predict {--} Predicting the Semantic Changes in Words across Corpora by Context Swapping",
author = "Aida, Taichi and
Bollegala, Danushka",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.520",
doi = "10.18653/v1/2023.findings-emnlp.520",
pages = "7753--7772",
abstract = "Meanings of words change over time and across domains. Detecting the semantic changes of words is an important task for various NLP applications that must make time-sensitive predictions. We consider the problem of predicting whether a given target word, $w$, changes its meaning between two different text corpora, $\mathcal{C}_1$ and $\mathcal{C}_2$. For this purpose, we propose $\textit{Swapping-based Semantic Change Detection}$ (SSCD), an unsupervised method that randomly swaps contexts between $\mathcal{C}_1$ and $\mathcal{C}_2$ where $w$ occurs. We then look at the distribution of contextualised word embeddings of $w$, obtained from a pretrained masked language model (MLM), representing the meaning of $w$ in its occurrence contexts in $\mathcal{C}_1$ and $\mathcal{C}_2$. Intuitively, if the meaning of $w$ does not change between $\mathcal{C}_1$ and $\mathcal{C}_2$, we would expect the distributions of contextualised word embeddings of $w$ to remain the same before and after this random swapping process. Despite its simplicity, we demonstrate that even by using pretrained MLMs without any fine-tuning, our proposed context swapping method accurately predicts the semantic changes of words in four languages (English, German, Swedish, and Latin) and across different time spans (over 50 years and about five years). Moreover, our method achieves significant performance improvements compared to strong baselines for the English semantic change prediction task. Source code is available at https://github.com/a1da4/svp-swap .",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aida-bollegala-2023-swap">
<titleInfo>
<title>Swap and Predict – Predicting the Semantic Changes in Words across Corpora by Context Swapping</title>
</titleInfo>
<name type="personal">
<namePart type="given">Taichi</namePart>
<namePart type="family">Aida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Meanings of words change over time and across domains. Detecting the semantic changes of words is an important task for various NLP applications that must make time-sensitive predictions. We consider the problem of predicting whether a given target word, w, changes its meaning between two different text corpora, \mathcalC₁ and \mathcalC₂. For this purpose, we propose Swapping-based Semantic Change Detection (SSCD), an unsupervised method that randomly swaps contexts between \mathcalC₁ and \mathcalC₂ where w occurs. We then look at the distribution of contextualised word embeddings of w, obtained from a pretrained masked language model (MLM), representing the meaning of w in its occurrence contexts in \mathcalC₁ and \mathcalC₂. Intuitively, if the meaning of w does not change between \mathcalC₁ and \mathcalC₂, we would expect the distributions of contextualised word embeddings of w to remain the same before and after this random swapping process. Despite its simplicity, we demonstrate that even by using pretrained MLMs without any fine-tuning, our proposed context swapping method accurately predicts the semantic changes of words in four languages (English, German, Swedish, and Latin) and across different time spans (over 50 years and about five years). Moreover, our method achieves significant performance improvements compared to strong baselines for the English semantic change prediction task. Source code is available at https://github.com/a1da4/svp-swap .</abstract>
<identifier type="citekey">aida-bollegala-2023-swap</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.520</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.520</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>7753</start>
<end>7772</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Swap and Predict – Predicting the Semantic Changes in Words across Corpora by Context Swapping
%A Aida, Taichi
%A Bollegala, Danushka
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F aida-bollegala-2023-swap
%X Meanings of words change over time and across domains. Detecting the semantic changes of words is an important task for various NLP applications that must make time-sensitive predictions. We consider the problem of predicting whether a given target word, w, changes its meaning between two different text corpora, \mathcalC₁ and \mathcalC₂. For this purpose, we propose Swapping-based Semantic Change Detection (SSCD), an unsupervised method that randomly swaps contexts between \mathcalC₁ and \mathcalC₂ where w occurs. We then look at the distribution of contextualised word embeddings of w, obtained from a pretrained masked language model (MLM), representing the meaning of w in its occurrence contexts in \mathcalC₁ and \mathcalC₂. Intuitively, if the meaning of w does not change between \mathcalC₁ and \mathcalC₂, we would expect the distributions of contextualised word embeddings of w to remain the same before and after this random swapping process. Despite its simplicity, we demonstrate that even by using pretrained MLMs without any fine-tuning, our proposed context swapping method accurately predicts the semantic changes of words in four languages (English, German, Swedish, and Latin) and across different time spans (over 50 years and about five years). Moreover, our method achieves significant performance improvements compared to strong baselines for the English semantic change prediction task. Source code is available at https://github.com/a1da4/svp-swap .
%R 10.18653/v1/2023.findings-emnlp.520
%U https://aclanthology.org/2023.findings-emnlp.520
%U https://doi.org/10.18653/v1/2023.findings-emnlp.520
%P 7753-7772
Markdown (Informal)
[Swap and Predict – Predicting the Semantic Changes in Words across Corpora by Context Swapping](https://aclanthology.org/2023.findings-emnlp.520) (Aida & Bollegala, Findings 2023)
ACL