@inproceedings{mcgillivray-etal-2022-leveraging,
title = "Leveraging time-dependent lexical features for offensive language detection",
author = "McGillivray, Barbara and
Alahapperuma, Malithi and
Cook, Jonathan and
Di Bonaventura, Chiara and
Mero{\~n}o-Pe{\~n}uela, Albert and
Tyson, Gareth and
Wilson, Steven",
editor = "Barbieri, Francesco and
Camacho-Collados, Jose and
Dhingra, Bhuwan and
Espinosa-Anke, Luis and
Gribovskaya, Elena and
Lazaridou, Angeliki and
Loureiro, Daniel and
Neves, Leonardo",
booktitle = "Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.evonlp-1.7",
doi = "10.18653/v1/2022.evonlp-1.7",
pages = "39--54",
abstract = "We present a study on the integration of time-sensitive information in lexicon-based offensive language detection systems. Our focus is on Offenseval sub-task A, aimed at detecting offensive tweets. We apply a semantic change detection algorithm over a short time span of two years to detect words whose semantics has changed and we focus particularly on those words that acquired or lost an offensive meaning between 2019 and 2020. Using the output of this semantic change detection approach, we train an SVM classifier on the Offenseval 2019 training set. We build on the already competitive SINAI system submitted to Offenseval 2019 by adding new lexical features, including those that capture the change in usage of words and their association with emerging offensive usages. We discuss the challenges, opportunities and limitations of integrating semantic change detection in offensive language detection models. Our work draws attention to an often neglected aspect of offensive language, namely that the meanings of words are constantly evolving and that NLP systems that account for this change can achieve good performance even when not trained on the most recent training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mcgillivray-etal-2022-leveraging">
<titleInfo>
<title>Leveraging time-dependent lexical features for offensive language detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">McGillivray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malithi</namePart>
<namePart type="family">Alahapperuma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Cook</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Di Bonaventura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Meroño-Peñuela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gareth</namePart>
<namePart type="family">Tyson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Barbieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-Collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhuwan</namePart>
<namePart type="family">Dhingra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Espinosa-Anke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Gribovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Loureiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Neves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a study on the integration of time-sensitive information in lexicon-based offensive language detection systems. Our focus is on Offenseval sub-task A, aimed at detecting offensive tweets. We apply a semantic change detection algorithm over a short time span of two years to detect words whose semantics has changed and we focus particularly on those words that acquired or lost an offensive meaning between 2019 and 2020. Using the output of this semantic change detection approach, we train an SVM classifier on the Offenseval 2019 training set. We build on the already competitive SINAI system submitted to Offenseval 2019 by adding new lexical features, including those that capture the change in usage of words and their association with emerging offensive usages. We discuss the challenges, opportunities and limitations of integrating semantic change detection in offensive language detection models. Our work draws attention to an often neglected aspect of offensive language, namely that the meanings of words are constantly evolving and that NLP systems that account for this change can achieve good performance even when not trained on the most recent training data.</abstract>
<identifier type="citekey">mcgillivray-etal-2022-leveraging</identifier>
<identifier type="doi">10.18653/v1/2022.evonlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.evonlp-1.7</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>39</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging time-dependent lexical features for offensive language detection
%A McGillivray, Barbara
%A Alahapperuma, Malithi
%A Cook, Jonathan
%A Di Bonaventura, Chiara
%A Meroño-Peñuela, Albert
%A Tyson, Gareth
%A Wilson, Steven
%Y Barbieri, Francesco
%Y Camacho-Collados, Jose
%Y Dhingra, Bhuwan
%Y Espinosa-Anke, Luis
%Y Gribovskaya, Elena
%Y Lazaridou, Angeliki
%Y Loureiro, Daniel
%Y Neves, Leonardo
%S Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F mcgillivray-etal-2022-leveraging
%X We present a study on the integration of time-sensitive information in lexicon-based offensive language detection systems. Our focus is on Offenseval sub-task A, aimed at detecting offensive tweets. We apply a semantic change detection algorithm over a short time span of two years to detect words whose semantics has changed and we focus particularly on those words that acquired or lost an offensive meaning between 2019 and 2020. Using the output of this semantic change detection approach, we train an SVM classifier on the Offenseval 2019 training set. We build on the already competitive SINAI system submitted to Offenseval 2019 by adding new lexical features, including those that capture the change in usage of words and their association with emerging offensive usages. We discuss the challenges, opportunities and limitations of integrating semantic change detection in offensive language detection models. Our work draws attention to an often neglected aspect of offensive language, namely that the meanings of words are constantly evolving and that NLP systems that account for this change can achieve good performance even when not trained on the most recent training data.
%R 10.18653/v1/2022.evonlp-1.7
%U https://aclanthology.org/2022.evonlp-1.7
%U https://doi.org/10.18653/v1/2022.evonlp-1.7
%P 39-54
Markdown (Informal)
[Leveraging time-dependent lexical features for offensive language detection](https://aclanthology.org/2022.evonlp-1.7) (McGillivray et al., EvoNLP 2022)
ACL
- Barbara McGillivray, Malithi Alahapperuma, Jonathan Cook, Chiara Di Bonaventura, Albert Meroño-Peñuela, Gareth Tyson, and Steven Wilson. 2022. Leveraging time-dependent lexical features for offensive language detection. In Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP), pages 39–54, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.