@inproceedings{homan-etal-2024-intersectionality,
title = "Intersectionality in {AI} Safety: Using Multilevel Models to Understand Diverse Perceptions of Safety in Conversational {AI}",
author = "Homan, Christopher and
Serapio-Garcia, Gregory and
Aroyo, Lora and
Diaz, Mark and
Parrish, Alicia and
Prabhakaran, Vinodkumar and
Taylor, Alex and
Wang, Ding",
editor = "Abercrombie, Gavin and
Basile, Valerio and
Bernadi, Davide and
Dudy, Shiran and
Frenda, Simona and
Havens, Lucy and
Tonelli, Sara",
booktitle = "Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.nlperspectives-1.15",
pages = "131--141",
abstract = "State-of-the-art conversational AI exhibits a level of sophistication that promises to have profound impacts on many aspects of daily life, including how people seek information, create content, and find emotional support. It has also shown a propensity for bias, offensive language, and false information. Consequently, understanding and moderating safety risks posed by interacting with AI chatbots is a critical technical and social challenge. Safety annotation is an intrinsically subjective task, where many factors{---}often intersecting{---}determine why people may express different opinions on whether a conversation is safe. We apply Bayesian multilevel models to surface factors that best predict rater behavior to a dataset of 101,286 annotations of conversations between humans and an AI chatbot, stratified by rater gender, age, race/ethnicity, and education level. We show that intersectional effects involving these factors play significant roles in validating safety in conversational AI data. For example, race/ethnicity and gender show strong intersectional effects, particularly among South Asian and East Asian women. We also find that conversational degree of harm impacts raters of all race/ethnicity groups, but that Indigenous and South Asian raters are particularly sensitive. Finally, we discover that the effect of education is uniquely intersectional for Indigenous raters. Our results underscore the utility of multilevel frameworks for uncovering underrepresented social perspectives.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="homan-etal-2024-intersectionality">
<titleInfo>
<title>Intersectionality in AI Safety: Using Multilevel Models to Understand Diverse Perceptions of Safety in Conversational AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Homan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gregory</namePart>
<namePart type="family">Serapio-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lora</namePart>
<namePart type="family">Aroyo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Diaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Taylor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ding</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gavin</namePart>
<namePart type="family">Abercrombie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davide</namePart>
<namePart type="family">Bernadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiran</namePart>
<namePart type="family">Dudy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Frenda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucy</namePart>
<namePart type="family">Havens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>State-of-the-art conversational AI exhibits a level of sophistication that promises to have profound impacts on many aspects of daily life, including how people seek information, create content, and find emotional support. It has also shown a propensity for bias, offensive language, and false information. Consequently, understanding and moderating safety risks posed by interacting with AI chatbots is a critical technical and social challenge. Safety annotation is an intrinsically subjective task, where many factors—often intersecting—determine why people may express different opinions on whether a conversation is safe. We apply Bayesian multilevel models to surface factors that best predict rater behavior to a dataset of 101,286 annotations of conversations between humans and an AI chatbot, stratified by rater gender, age, race/ethnicity, and education level. We show that intersectional effects involving these factors play significant roles in validating safety in conversational AI data. For example, race/ethnicity and gender show strong intersectional effects, particularly among South Asian and East Asian women. We also find that conversational degree of harm impacts raters of all race/ethnicity groups, but that Indigenous and South Asian raters are particularly sensitive. Finally, we discover that the effect of education is uniquely intersectional for Indigenous raters. Our results underscore the utility of multilevel frameworks for uncovering underrepresented social perspectives.</abstract>
<identifier type="citekey">homan-etal-2024-intersectionality</identifier>
<location>
<url>https://aclanthology.org/2024.nlperspectives-1.15</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>131</start>
<end>141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Intersectionality in AI Safety: Using Multilevel Models to Understand Diverse Perceptions of Safety in Conversational AI
%A Homan, Christopher
%A Serapio-Garcia, Gregory
%A Aroyo, Lora
%A Diaz, Mark
%A Parrish, Alicia
%A Prabhakaran, Vinodkumar
%A Taylor, Alex
%A Wang, Ding
%Y Abercrombie, Gavin
%Y Basile, Valerio
%Y Bernadi, Davide
%Y Dudy, Shiran
%Y Frenda, Simona
%Y Havens, Lucy
%Y Tonelli, Sara
%S Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F homan-etal-2024-intersectionality
%X State-of-the-art conversational AI exhibits a level of sophistication that promises to have profound impacts on many aspects of daily life, including how people seek information, create content, and find emotional support. It has also shown a propensity for bias, offensive language, and false information. Consequently, understanding and moderating safety risks posed by interacting with AI chatbots is a critical technical and social challenge. Safety annotation is an intrinsically subjective task, where many factors—often intersecting—determine why people may express different opinions on whether a conversation is safe. We apply Bayesian multilevel models to surface factors that best predict rater behavior to a dataset of 101,286 annotations of conversations between humans and an AI chatbot, stratified by rater gender, age, race/ethnicity, and education level. We show that intersectional effects involving these factors play significant roles in validating safety in conversational AI data. For example, race/ethnicity and gender show strong intersectional effects, particularly among South Asian and East Asian women. We also find that conversational degree of harm impacts raters of all race/ethnicity groups, but that Indigenous and South Asian raters are particularly sensitive. Finally, we discover that the effect of education is uniquely intersectional for Indigenous raters. Our results underscore the utility of multilevel frameworks for uncovering underrepresented social perspectives.
%U https://aclanthology.org/2024.nlperspectives-1.15
%P 131-141
Markdown (Informal)
[Intersectionality in AI Safety: Using Multilevel Models to Understand Diverse Perceptions of Safety in Conversational AI](https://aclanthology.org/2024.nlperspectives-1.15) (Homan et al., NLPerspectives-WS 2024)
ACL
- Christopher Homan, Gregory Serapio-Garcia, Lora Aroyo, Mark Diaz, Alicia Parrish, Vinodkumar Prabhakaran, Alex Taylor, and Ding Wang. 2024. Intersectionality in AI Safety: Using Multilevel Models to Understand Diverse Perceptions of Safety in Conversational AI. In Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024, pages 131–141, Torino, Italia. ELRA and ICCL.