@inproceedings{reuver-etal-2025-tell,
title = "Tell Me What You Know About Sexism: Expert-{LLM} Interaction Strategies and Co-Created Definitions for Zero-Shot Sexism Detection",
author = "Reuver, Myrthe and
Sen, Indira and
Melis, Matteo and
Lapesa, Gabriella",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.470/",
doi = "10.18653/v1/2025.findings-naacl.470",
pages = "8438--8467",
ISBN = "979-8-89176-195-7",
abstract = "This paper investigates hybrid intelligence and collaboration between researchers of sexism and Large Language Models (LLMs), with afour-component pipeline. First, nine sexism researchers answer questions about their knowledge of sexism and of LLMs. They then participate in two interactive experiments involving an LLM (GPT3.5). The first experiment has experts assessing the model{'}s knowledgeabout sexism and suitability for use in research. The second experiment tasks them with creating three different definitions of sexism: anexpert-written definition, an LLM-written one, and a co-created definition. Lastly, zero-shot classification experiments use the three definitions from each expert in a prompt template for sexism detection, evaluating GPT4o on 2.500 texts sampled from five sexism benchmarks. We then analyze the resulting 67.500 classification decisions. The LLM interactions lead to longer and more complex definitions of sexism. Expert-written definitions on average perform poorly compared to LLM-generated definitions. However, some experts do improve classification performance with their co-created definitions of sexism, also experts who are inexperienced in using LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="reuver-etal-2025-tell">
<titleInfo>
<title>Tell Me What You Know About Sexism: Expert-LLM Interaction Strategies and Co-Created Definitions for Zero-Shot Sexism Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Myrthe</namePart>
<namePart type="family">Reuver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indira</namePart>
<namePart type="family">Sen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Melis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Lapesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>This paper investigates hybrid intelligence and collaboration between researchers of sexism and Large Language Models (LLMs), with afour-component pipeline. First, nine sexism researchers answer questions about their knowledge of sexism and of LLMs. They then participate in two interactive experiments involving an LLM (GPT3.5). The first experiment has experts assessing the model’s knowledgeabout sexism and suitability for use in research. The second experiment tasks them with creating three different definitions of sexism: anexpert-written definition, an LLM-written one, and a co-created definition. Lastly, zero-shot classification experiments use the three definitions from each expert in a prompt template for sexism detection, evaluating GPT4o on 2.500 texts sampled from five sexism benchmarks. We then analyze the resulting 67.500 classification decisions. The LLM interactions lead to longer and more complex definitions of sexism. Expert-written definitions on average perform poorly compared to LLM-generated definitions. However, some experts do improve classification performance with their co-created definitions of sexism, also experts who are inexperienced in using LLMs.</abstract>
<identifier type="citekey">reuver-etal-2025-tell</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.470</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.470/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>8438</start>
<end>8467</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tell Me What You Know About Sexism: Expert-LLM Interaction Strategies and Co-Created Definitions for Zero-Shot Sexism Detection
%A Reuver, Myrthe
%A Sen, Indira
%A Melis, Matteo
%A Lapesa, Gabriella
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F reuver-etal-2025-tell
%X This paper investigates hybrid intelligence and collaboration between researchers of sexism and Large Language Models (LLMs), with afour-component pipeline. First, nine sexism researchers answer questions about their knowledge of sexism and of LLMs. They then participate in two interactive experiments involving an LLM (GPT3.5). The first experiment has experts assessing the model’s knowledgeabout sexism and suitability for use in research. The second experiment tasks them with creating three different definitions of sexism: anexpert-written definition, an LLM-written one, and a co-created definition. Lastly, zero-shot classification experiments use the three definitions from each expert in a prompt template for sexism detection, evaluating GPT4o on 2.500 texts sampled from five sexism benchmarks. We then analyze the resulting 67.500 classification decisions. The LLM interactions lead to longer and more complex definitions of sexism. Expert-written definitions on average perform poorly compared to LLM-generated definitions. However, some experts do improve classification performance with their co-created definitions of sexism, also experts who are inexperienced in using LLMs.
%R 10.18653/v1/2025.findings-naacl.470
%U https://aclanthology.org/2025.findings-naacl.470/
%U https://doi.org/10.18653/v1/2025.findings-naacl.470
%P 8438-8467
Markdown (Informal)
[Tell Me What You Know About Sexism: Expert-LLM Interaction Strategies and Co-Created Definitions for Zero-Shot Sexism Detection](https://aclanthology.org/2025.findings-naacl.470/) (Reuver et al., Findings 2025)
ACL