@inproceedings{hoeken-etal-2023-methodological,
title = "Methodological Insights in Detecting Subtle Semantic Shifts with Contextualized and Static Language Models",
author = {Hoeken, Sanne and
Alacam, {\"O}zge and
Fokkens, Antske and
Sommerauer, Pia},
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.237/",
doi = "10.18653/v1/2023.findings-emnlp.237",
pages = "3662--3675",
abstract = "In this paper, we investigate automatic detection of subtle semantic shifts between social communities of different political convictions in Dutch and English. We perform a methodological study comparing methods using static and contextualized language models. We investigate the impact of specializing contextualized models through fine-tuning on target corpora, word sense disambiguation and sentiment. We furthermore propose a new approach using masked token prediction, that relies on behavioral information, specifically the most probable substitutions, instead of geometrical comparison of representations. Our results show that methods using static models and our masked token prediction method can detect differences in connotation of politically loaded terms, whereas methods that rely on measuring the distance between contextualized representations are not providing clear signals, even in synthetic scenarios of extreme shifts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hoeken-etal-2023-methodological">
<titleInfo>
<title>Methodological Insights in Detecting Subtle Semantic Shifts with Contextualized and Static Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanne</namePart>
<namePart type="family">Hoeken</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Özge</namePart>
<namePart type="family">Alacam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antske</namePart>
<namePart type="family">Fokkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pia</namePart>
<namePart type="family">Sommerauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate automatic detection of subtle semantic shifts between social communities of different political convictions in Dutch and English. We perform a methodological study comparing methods using static and contextualized language models. We investigate the impact of specializing contextualized models through fine-tuning on target corpora, word sense disambiguation and sentiment. We furthermore propose a new approach using masked token prediction, that relies on behavioral information, specifically the most probable substitutions, instead of geometrical comparison of representations. Our results show that methods using static models and our masked token prediction method can detect differences in connotation of politically loaded terms, whereas methods that rely on measuring the distance between contextualized representations are not providing clear signals, even in synthetic scenarios of extreme shifts.</abstract>
<identifier type="citekey">hoeken-etal-2023-methodological</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.237</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.237/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>3662</start>
<end>3675</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Methodological Insights in Detecting Subtle Semantic Shifts with Contextualized and Static Language Models
%A Hoeken, Sanne
%A Alacam, Özge
%A Fokkens, Antske
%A Sommerauer, Pia
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F hoeken-etal-2023-methodological
%X In this paper, we investigate automatic detection of subtle semantic shifts between social communities of different political convictions in Dutch and English. We perform a methodological study comparing methods using static and contextualized language models. We investigate the impact of specializing contextualized models through fine-tuning on target corpora, word sense disambiguation and sentiment. We furthermore propose a new approach using masked token prediction, that relies on behavioral information, specifically the most probable substitutions, instead of geometrical comparison of representations. Our results show that methods using static models and our masked token prediction method can detect differences in connotation of politically loaded terms, whereas methods that rely on measuring the distance between contextualized representations are not providing clear signals, even in synthetic scenarios of extreme shifts.
%R 10.18653/v1/2023.findings-emnlp.237
%U https://aclanthology.org/2023.findings-emnlp.237/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.237
%P 3662-3675
Markdown (Informal)
[Methodological Insights in Detecting Subtle Semantic Shifts with Contextualized and Static Language Models](https://aclanthology.org/2023.findings-emnlp.237/) (Hoeken et al., Findings 2023)
ACL