@inproceedings{gautam-etal-2024-winopron,
title = "{W}ino{P}ron: Revisiting {E}nglish {W}inogender Schemas for Consistency, Coverage, and Grammatical Case",
author = "Gautam, Vagrant and
Steuer, Julius and
Bingert, Eileen and
Johns, Ray and
Lauscher, Anne and
Klakow, Dietrich",
editor = "Ogrodniczuk, Maciej and
Nedoluzhko, Anna and
Poesio, Massimo and
Pradhan, Sameer and
Ng, Vincent",
booktitle = "Proceedings of the Seventh Workshop on Computational Models of Reference, Anaphora and Coreference",
month = nov,
year = "2024",
address = "Miami",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.crac-1.6/",
doi = "10.18653/v1/2024.crac-1.6",
pages = "52--66",
abstract = "While measuring bias and robustness in coreference resolution are important goals, such measurements are only as good as the tools we use to measure them. Winogender Schemas (Rudinger et al., 2018) are an influential dataset proposed to evaluate gender bias in coreference resolution, but a closer look reveals issues with the data that compromise its use for reliable evaluation, including treating different pronominal forms as equivalent, violations of template constraints, and typographical errors. We identify these issues and fix them, contributing a new dataset: WinoPron. Using WinoPron, we evaluate two state-of-the-art supervised coreference resolution systems, SpanBERT, and five sizes of FLAN-T5, and demonstrate that accusative pronouns are harder to resolve for all models. We also propose a new method to evaluate pronominal bias in coreference resolution that goes beyond the binary. With this method, we also show that bias characteristics vary not just across pronoun sets (e.g., \textit{he} vs. \textit{she}), but also across surface forms of those sets (e.g., \textit{him} vs. \textit{his})."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gautam-etal-2024-winopron">
<titleInfo>
<title>WinoPron: Revisiting English Winogender Schemas for Consistency, Coverage, and Grammatical Case</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vagrant</namePart>
<namePart type="family">Gautam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julius</namePart>
<namePart type="family">Steuer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eileen</namePart>
<namePart type="family">Bingert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ray</namePart>
<namePart type="family">Johns</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Lauscher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dietrich</namePart>
<namePart type="family">Klakow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Computational Models of Reference, Anaphora and Coreference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Nedoluzhko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Poesio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pradhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While measuring bias and robustness in coreference resolution are important goals, such measurements are only as good as the tools we use to measure them. Winogender Schemas (Rudinger et al., 2018) are an influential dataset proposed to evaluate gender bias in coreference resolution, but a closer look reveals issues with the data that compromise its use for reliable evaluation, including treating different pronominal forms as equivalent, violations of template constraints, and typographical errors. We identify these issues and fix them, contributing a new dataset: WinoPron. Using WinoPron, we evaluate two state-of-the-art supervised coreference resolution systems, SpanBERT, and five sizes of FLAN-T5, and demonstrate that accusative pronouns are harder to resolve for all models. We also propose a new method to evaluate pronominal bias in coreference resolution that goes beyond the binary. With this method, we also show that bias characteristics vary not just across pronoun sets (e.g., he vs. she), but also across surface forms of those sets (e.g., him vs. his).</abstract>
<identifier type="citekey">gautam-etal-2024-winopron</identifier>
<identifier type="doi">10.18653/v1/2024.crac-1.6</identifier>
<location>
<url>https://aclanthology.org/2024.crac-1.6/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>52</start>
<end>66</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WinoPron: Revisiting English Winogender Schemas for Consistency, Coverage, and Grammatical Case
%A Gautam, Vagrant
%A Steuer, Julius
%A Bingert, Eileen
%A Johns, Ray
%A Lauscher, Anne
%A Klakow, Dietrich
%Y Ogrodniczuk, Maciej
%Y Nedoluzhko, Anna
%Y Poesio, Massimo
%Y Pradhan, Sameer
%Y Ng, Vincent
%S Proceedings of the Seventh Workshop on Computational Models of Reference, Anaphora and Coreference
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami
%F gautam-etal-2024-winopron
%X While measuring bias and robustness in coreference resolution are important goals, such measurements are only as good as the tools we use to measure them. Winogender Schemas (Rudinger et al., 2018) are an influential dataset proposed to evaluate gender bias in coreference resolution, but a closer look reveals issues with the data that compromise its use for reliable evaluation, including treating different pronominal forms as equivalent, violations of template constraints, and typographical errors. We identify these issues and fix them, contributing a new dataset: WinoPron. Using WinoPron, we evaluate two state-of-the-art supervised coreference resolution systems, SpanBERT, and five sizes of FLAN-T5, and demonstrate that accusative pronouns are harder to resolve for all models. We also propose a new method to evaluate pronominal bias in coreference resolution that goes beyond the binary. With this method, we also show that bias characteristics vary not just across pronoun sets (e.g., he vs. she), but also across surface forms of those sets (e.g., him vs. his).
%R 10.18653/v1/2024.crac-1.6
%U https://aclanthology.org/2024.crac-1.6/
%U https://doi.org/10.18653/v1/2024.crac-1.6
%P 52-66
Markdown (Informal)
[WinoPron: Revisiting English Winogender Schemas for Consistency, Coverage, and Grammatical Case](https://aclanthology.org/2024.crac-1.6/) (Gautam et al., CRAC 2024)
ACL
- Vagrant Gautam, Julius Steuer, Eileen Bingert, Ray Johns, Anne Lauscher, and Dietrich Klakow. 2024. WinoPron: Revisiting English Winogender Schemas for Consistency, Coverage, and Grammatical Case. In Proceedings of the Seventh Workshop on Computational Models of Reference, Anaphora and Coreference, pages 52–66, Miami. Association for Computational Linguistics.