@inproceedings{bunzeck-zarriess-2024-slayqa,
title = "The {S}lay{QA} benchmark of social reasoning: testing gender-inclusive generalization with neopronouns",
author = "Bunzeck, Bastian and
Zarrie{\ss}, Sina",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Giulianelli, Mario and
Cotterell, Ryan",
booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.genbench-1.3",
pages = "42--53",
abstract = "We introduce SlayQA, a novel benchmark data set designed to evaluate language models{'} ability to handle gender-inclusive language, specifically the use of neopronouns, in a question-answering setting. Derived from the Social IQa data set, SlayQA modifies context-question-answer triples to include gender-neutral pronouns, creating a significant linguistic distribution shift in comparison to common pre-training corpora like C4 or Dolma. Our results show that state-of-the-art language models struggle with the challenge, exhibiting small, but noticeable performance drops when answering question containing neopronouns compared to those without.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bunzeck-zarriess-2024-slayqa">
<titleInfo>
<title>The SlayQA benchmark of social reasoning: testing gender-inclusive generalization with neopronouns</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bastian</namePart>
<namePart type="family">Bunzeck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khuyagbaatar</namePart>
<namePart type="family">Batsuren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Kazemnejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Giulianelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce SlayQA, a novel benchmark data set designed to evaluate language models’ ability to handle gender-inclusive language, specifically the use of neopronouns, in a question-answering setting. Derived from the Social IQa data set, SlayQA modifies context-question-answer triples to include gender-neutral pronouns, creating a significant linguistic distribution shift in comparison to common pre-training corpora like C4 or Dolma. Our results show that state-of-the-art language models struggle with the challenge, exhibiting small, but noticeable performance drops when answering question containing neopronouns compared to those without.</abstract>
<identifier type="citekey">bunzeck-zarriess-2024-slayqa</identifier>
<location>
<url>https://aclanthology.org/2024.genbench-1.3</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>42</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The SlayQA benchmark of social reasoning: testing gender-inclusive generalization with neopronouns
%A Bunzeck, Bastian
%A Zarrieß, Sina
%Y Hupkes, Dieuwke
%Y Dankers, Verna
%Y Batsuren, Khuyagbaatar
%Y Kazemnejad, Amirhossein
%Y Christodoulopoulos, Christos
%Y Giulianelli, Mario
%Y Cotterell, Ryan
%S Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F bunzeck-zarriess-2024-slayqa
%X We introduce SlayQA, a novel benchmark data set designed to evaluate language models’ ability to handle gender-inclusive language, specifically the use of neopronouns, in a question-answering setting. Derived from the Social IQa data set, SlayQA modifies context-question-answer triples to include gender-neutral pronouns, creating a significant linguistic distribution shift in comparison to common pre-training corpora like C4 or Dolma. Our results show that state-of-the-art language models struggle with the challenge, exhibiting small, but noticeable performance drops when answering question containing neopronouns compared to those without.
%U https://aclanthology.org/2024.genbench-1.3
%P 42-53
Markdown (Informal)
[The SlayQA benchmark of social reasoning: testing gender-inclusive generalization with neopronouns](https://aclanthology.org/2024.genbench-1.3) (Bunzeck & Zarrieß, GenBench 2024)
ACL