@inproceedings{touileb-nozza-2022-measuring,
title = "Measuring Harmful Representations in {S}candinavian Language Models",
author = "Touileb, Samia and
Nozza, Debora",
editor = "Bamman, David and
Hovy, Dirk and
Jurgens, David and
Keith, Katherine and
O'Connor, Brendan and
Volkova, Svitlana",
booktitle = "Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)",
month = nov,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nlpcss-1.13",
doi = "10.18653/v1/2022.nlpcss-1.13",
pages = "118--125",
abstract = "Scandinavian countries are perceived as role-models when it comes to gender equality. With the advent of pre-trained language models and their widespread usage, we investigate to what extent gender-based harmful and toxic content exists in selected Scandinavian language models. We examine nine models, covering Danish, Swedish, and Norwegian, by manually creating template-based sentences and probing the models for completion. We evaluate the completions using two methods for measuring harmful and toxic completions and provide a thorough analysis of the results. We show that Scandinavian pre-trained language models contain harmful and gender-based stereotypes with similar values across all languages. This finding goes against the general expectations related to gender equality in Scandinavian countries and shows the possible problematic outcomes of using such models in real-world settings. Warning: Some of the examples provided in this paper can be upsetting and offensive.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="touileb-nozza-2022-measuring">
<titleInfo>
<title>Measuring Harmful Representations in Scandinavian Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Volkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scandinavian countries are perceived as role-models when it comes to gender equality. With the advent of pre-trained language models and their widespread usage, we investigate to what extent gender-based harmful and toxic content exists in selected Scandinavian language models. We examine nine models, covering Danish, Swedish, and Norwegian, by manually creating template-based sentences and probing the models for completion. We evaluate the completions using two methods for measuring harmful and toxic completions and provide a thorough analysis of the results. We show that Scandinavian pre-trained language models contain harmful and gender-based stereotypes with similar values across all languages. This finding goes against the general expectations related to gender equality in Scandinavian countries and shows the possible problematic outcomes of using such models in real-world settings. Warning: Some of the examples provided in this paper can be upsetting and offensive.</abstract>
<identifier type="citekey">touileb-nozza-2022-measuring</identifier>
<identifier type="doi">10.18653/v1/2022.nlpcss-1.13</identifier>
<location>
<url>https://aclanthology.org/2022.nlpcss-1.13</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>118</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Harmful Representations in Scandinavian Language Models
%A Touileb, Samia
%A Nozza, Debora
%Y Bamman, David
%Y Hovy, Dirk
%Y Jurgens, David
%Y Keith, Katherine
%Y O’Connor, Brendan
%Y Volkova, Svitlana
%S Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F touileb-nozza-2022-measuring
%X Scandinavian countries are perceived as role-models when it comes to gender equality. With the advent of pre-trained language models and their widespread usage, we investigate to what extent gender-based harmful and toxic content exists in selected Scandinavian language models. We examine nine models, covering Danish, Swedish, and Norwegian, by manually creating template-based sentences and probing the models for completion. We evaluate the completions using two methods for measuring harmful and toxic completions and provide a thorough analysis of the results. We show that Scandinavian pre-trained language models contain harmful and gender-based stereotypes with similar values across all languages. This finding goes against the general expectations related to gender equality in Scandinavian countries and shows the possible problematic outcomes of using such models in real-world settings. Warning: Some of the examples provided in this paper can be upsetting and offensive.
%R 10.18653/v1/2022.nlpcss-1.13
%U https://aclanthology.org/2022.nlpcss-1.13
%U https://doi.org/10.18653/v1/2022.nlpcss-1.13
%P 118-125
Markdown (Informal)
[Measuring Harmful Representations in Scandinavian Language Models](https://aclanthology.org/2022.nlpcss-1.13) (Touileb & Nozza, NLP+CSS 2022)
ACL