@inproceedings{bourgois-poibeau-2025-elephant,
title = "The Elephant in the Coreference Room: Resolving Coreference in Full-Length {F}rench Fiction Works",
author = "Bourgois, Antoine and
Poibeau, Thierry",
editor = "Ogrodniczuk, Maciej and
Novak, Michal and
Poesio, Massimo and
Pradhan, Sameer and
Ng, Vincent",
booktitle = "Proceedings of the Eighth Workshop on Computational Models of Reference, Anaphora and Coreference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.crac-1.5/",
pages = "55--69",
abstract = "While coreference resolution is attracting more interest than ever from computational literature researchers, representative datasets of fully annotated long documents remain surprisingly scarce. In this paper, we introduce a new annotated corpus of three full-length French novels, totaling over 285,000 tokens. Unlike previous datasets focused on shorter texts, our corpus addresses the challenges posed by long, complex literary works, enabling evaluation of coreference models in the context of long reference chains. We present a modular coreference resolution pipeline that allows for fine-grained error analysis. We show that our approach is competitive and scales effectively to long documents. Finally, we demonstrate its usefulness to infer the gender of fictional characters, showcasing its relevance for both literary analysis and downstream NLP tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bourgois-poibeau-2025-elephant">
<titleInfo>
<title>The Elephant in the Coreference Room: Resolving Coreference in Full-Length French Fiction Works</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bourgois</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Poibeau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Computational Models of Reference, Anaphora and Coreference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Novak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Poesio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pradhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While coreference resolution is attracting more interest than ever from computational literature researchers, representative datasets of fully annotated long documents remain surprisingly scarce. In this paper, we introduce a new annotated corpus of three full-length French novels, totaling over 285,000 tokens. Unlike previous datasets focused on shorter texts, our corpus addresses the challenges posed by long, complex literary works, enabling evaluation of coreference models in the context of long reference chains. We present a modular coreference resolution pipeline that allows for fine-grained error analysis. We show that our approach is competitive and scales effectively to long documents. Finally, we demonstrate its usefulness to infer the gender of fictional characters, showcasing its relevance for both literary analysis and downstream NLP tasks.</abstract>
<identifier type="citekey">bourgois-poibeau-2025-elephant</identifier>
<location>
<url>https://aclanthology.org/2025.crac-1.5/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>55</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Elephant in the Coreference Room: Resolving Coreference in Full-Length French Fiction Works
%A Bourgois, Antoine
%A Poibeau, Thierry
%Y Ogrodniczuk, Maciej
%Y Novak, Michal
%Y Poesio, Massimo
%Y Pradhan, Sameer
%Y Ng, Vincent
%S Proceedings of the Eighth Workshop on Computational Models of Reference, Anaphora and Coreference
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%F bourgois-poibeau-2025-elephant
%X While coreference resolution is attracting more interest than ever from computational literature researchers, representative datasets of fully annotated long documents remain surprisingly scarce. In this paper, we introduce a new annotated corpus of three full-length French novels, totaling over 285,000 tokens. Unlike previous datasets focused on shorter texts, our corpus addresses the challenges posed by long, complex literary works, enabling evaluation of coreference models in the context of long reference chains. We present a modular coreference resolution pipeline that allows for fine-grained error analysis. We show that our approach is competitive and scales effectively to long documents. Finally, we demonstrate its usefulness to infer the gender of fictional characters, showcasing its relevance for both literary analysis and downstream NLP tasks.
%U https://aclanthology.org/2025.crac-1.5/
%P 55-69
Markdown (Informal)
[The Elephant in the Coreference Room: Resolving Coreference in Full-Length French Fiction Works](https://aclanthology.org/2025.crac-1.5/) (Bourgois & Poibeau, CRAC 2025)
ACL