@inproceedings{mina-etal-2024-exploring,
title = "Exploring the Relationship Between Intrinsic Stigma in Masked Language Models and Training Data Using the Stereotype Content Model",
author = "Mina, Mario and
Falc{\~a}o, J{\'u}lia and
Gonzalez-Agirre, Aitor",
editor = "Kokkinakis, Dimitrios and
Fraser, Kathleen C. and
Themistocleous, Charalambos K. and
Fors, Kristina Lundholm and
Tsanas, Athanasios and
Ohman, Fredrik",
booktitle = "Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric/developmental impairments @LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.rapid-1.7",
pages = "54--67",
abstract = "Much work has gone into developing language models of increasing size, but only recently have we begun to examine them for pernicious behaviour that could lead to harming marginalised groups. Following Lin et al. (2022) in rooting our work in psychological research, we prompt two masked language models (MLMs) of different specialisations in English and Spanish with statements from a questionnaire developed to measure stigma to determine if they treat physical and mental illnesses equally. In both models we find a statistically significant difference in the treatment of physical and mental illnesses across most if not all latent constructs as measured by the questionnaire, and thus they are more likely to associate mental illnesses with stigma. We then examine their training data or data retrieved from the same domain using a computational implementation of the Stereotype Content Model (SCM) (Fiske et al., 2002; Fraser et al., 2021) to interpret the questionnaire results based on the SCM values as reflected in the data. We observe that model behaviour can largely be explained by the distribution of the mentions of illnesses according to their SCM values.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mina-etal-2024-exploring">
<titleInfo>
<title>Exploring the Relationship Between Intrinsic Stigma in Masked Language Models and Training Data Using the Stereotype Content Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Mina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Júlia</namePart>
<namePart type="family">Falcão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aitor</namePart>
<namePart type="family">Gonzalez-Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric/developmental impairments @LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dimitrios</namePart>
<namePart type="family">Kokkinakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathleen</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Fraser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charalambos</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Themistocleous</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="given">Lundholm</namePart>
<namePart type="family">Fors</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Athanasios</namePart>
<namePart type="family">Tsanas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fredrik</namePart>
<namePart type="family">Ohman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Much work has gone into developing language models of increasing size, but only recently have we begun to examine them for pernicious behaviour that could lead to harming marginalised groups. Following Lin et al. (2022) in rooting our work in psychological research, we prompt two masked language models (MLMs) of different specialisations in English and Spanish with statements from a questionnaire developed to measure stigma to determine if they treat physical and mental illnesses equally. In both models we find a statistically significant difference in the treatment of physical and mental illnesses across most if not all latent constructs as measured by the questionnaire, and thus they are more likely to associate mental illnesses with stigma. We then examine their training data or data retrieved from the same domain using a computational implementation of the Stereotype Content Model (SCM) (Fiske et al., 2002; Fraser et al., 2021) to interpret the questionnaire results based on the SCM values as reflected in the data. We observe that model behaviour can largely be explained by the distribution of the mentions of illnesses according to their SCM values.</abstract>
<identifier type="citekey">mina-etal-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.rapid-1.7</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>54</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring the Relationship Between Intrinsic Stigma in Masked Language Models and Training Data Using the Stereotype Content Model
%A Mina, Mario
%A Falcão, Júlia
%A Gonzalez-Agirre, Aitor
%Y Kokkinakis, Dimitrios
%Y Fraser, Kathleen C.
%Y Themistocleous, Charalambos K.
%Y Fors, Kristina Lundholm
%Y Tsanas, Athanasios
%Y Ohman, Fredrik
%S Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric/developmental impairments @LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F mina-etal-2024-exploring
%X Much work has gone into developing language models of increasing size, but only recently have we begun to examine them for pernicious behaviour that could lead to harming marginalised groups. Following Lin et al. (2022) in rooting our work in psychological research, we prompt two masked language models (MLMs) of different specialisations in English and Spanish with statements from a questionnaire developed to measure stigma to determine if they treat physical and mental illnesses equally. In both models we find a statistically significant difference in the treatment of physical and mental illnesses across most if not all latent constructs as measured by the questionnaire, and thus they are more likely to associate mental illnesses with stigma. We then examine their training data or data retrieved from the same domain using a computational implementation of the Stereotype Content Model (SCM) (Fiske et al., 2002; Fraser et al., 2021) to interpret the questionnaire results based on the SCM values as reflected in the data. We observe that model behaviour can largely be explained by the distribution of the mentions of illnesses according to their SCM values.
%U https://aclanthology.org/2024.rapid-1.7
%P 54-67
Markdown (Informal)
[Exploring the Relationship Between Intrinsic Stigma in Masked Language Models and Training Data Using the Stereotype Content Model](https://aclanthology.org/2024.rapid-1.7) (Mina et al., RaPID-WS 2024)
ACL