@inproceedings{heng-etal-2026-re2,
title = "Re2-{D}oc{RED}: Revisiting Revisited-{D}oc{RED} for Joint Entity and Relation Extraction",
author = "Heng, Chen Kim and
Tong, Shao Wen and
Sheng, Julian Wong Wei",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.213/",
pages = "4585--4621",
ISBN = "979-8-89176-380-7",
abstract = "Document-level Joint Entity and Relation Extraction (JERE) benchmarks such as DocRED, Re-DocRED, and DocGNRE suffer from pervasive False Negatives (FN), undermining training and evaluation. In this paper, we introduce SiftingLogic {--} a training-free annotation pipeline that leverages LLMs with user-specifiable reasoning, enriched inverse/co-occurring relation schemas, and novel entity-level constraints to systematically address FN gaps. Applying SiftingLogic and our enriched schema of inverse and co-occurring relations, we add 29,580 verified triplets to Re-DocRED (train/dev, +27{\%}) and over 9,700 verified triplets to DocGNRE test (+49.89{\%}), yielding the enhanced Re$^2$-DocRED dataset. Beyond English datasets, we also apply our SiftingLogic to REDFM Mandarin test set, resulting in a significant increase in triplets from 663 to 1,391 (+109.8{\%}) demonstrating our pipeline{'}s generalisability across languages and datasets. Our experiments show that recall scores of models trained on existing public datasets drop notably on our revised splits, whereas our enriched training set mitigates this, underscoring persistent FN gaps and motivating our proposed SiftingLogic and Re$^2$-DocRED. To facilitate further research and reproducibility of our work, the Re$^2$-DocRED dataset is released at \url{https://github.com/klassessg/re2-docred}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="heng-etal-2026-re2">
<titleInfo>
<title>Re2-DocRED: Revisiting Revisited-DocRED for Joint Entity and Relation Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="given">Kim</namePart>
<namePart type="family">Heng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shao</namePart>
<namePart type="given">Wen</namePart>
<namePart type="family">Tong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="given">Wong</namePart>
<namePart type="given">Wei</namePart>
<namePart type="family">Sheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Document-level Joint Entity and Relation Extraction (JERE) benchmarks such as DocRED, Re-DocRED, and DocGNRE suffer from pervasive False Negatives (FN), undermining training and evaluation. In this paper, we introduce SiftingLogic – a training-free annotation pipeline that leverages LLMs with user-specifiable reasoning, enriched inverse/co-occurring relation schemas, and novel entity-level constraints to systematically address FN gaps. Applying SiftingLogic and our enriched schema of inverse and co-occurring relations, we add 29,580 verified triplets to Re-DocRED (train/dev, +27%) and over 9,700 verified triplets to DocGNRE test (+49.89%), yielding the enhanced Re²-DocRED dataset. Beyond English datasets, we also apply our SiftingLogic to REDFM Mandarin test set, resulting in a significant increase in triplets from 663 to 1,391 (+109.8%) demonstrating our pipeline’s generalisability across languages and datasets. Our experiments show that recall scores of models trained on existing public datasets drop notably on our revised splits, whereas our enriched training set mitigates this, underscoring persistent FN gaps and motivating our proposed SiftingLogic and Re²-DocRED. To facilitate further research and reproducibility of our work, the Re²-DocRED dataset is released at https://github.com/klassessg/re2-docred.</abstract>
<identifier type="citekey">heng-etal-2026-re2</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.213/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>4585</start>
<end>4621</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Re2-DocRED: Revisiting Revisited-DocRED for Joint Entity and Relation Extraction
%A Heng, Chen Kim
%A Tong, Shao Wen
%A Sheng, Julian Wong Wei
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F heng-etal-2026-re2
%X Document-level Joint Entity and Relation Extraction (JERE) benchmarks such as DocRED, Re-DocRED, and DocGNRE suffer from pervasive False Negatives (FN), undermining training and evaluation. In this paper, we introduce SiftingLogic – a training-free annotation pipeline that leverages LLMs with user-specifiable reasoning, enriched inverse/co-occurring relation schemas, and novel entity-level constraints to systematically address FN gaps. Applying SiftingLogic and our enriched schema of inverse and co-occurring relations, we add 29,580 verified triplets to Re-DocRED (train/dev, +27%) and over 9,700 verified triplets to DocGNRE test (+49.89%), yielding the enhanced Re²-DocRED dataset. Beyond English datasets, we also apply our SiftingLogic to REDFM Mandarin test set, resulting in a significant increase in triplets from 663 to 1,391 (+109.8%) demonstrating our pipeline’s generalisability across languages and datasets. Our experiments show that recall scores of models trained on existing public datasets drop notably on our revised splits, whereas our enriched training set mitigates this, underscoring persistent FN gaps and motivating our proposed SiftingLogic and Re²-DocRED. To facilitate further research and reproducibility of our work, the Re²-DocRED dataset is released at https://github.com/klassessg/re2-docred.
%U https://aclanthology.org/2026.eacl-long.213/
%P 4585-4621
Markdown (Informal)
[Re2-DocRED: Revisiting Revisited-DocRED for Joint Entity and Relation Extraction](https://aclanthology.org/2026.eacl-long.213/) (Heng et al., EACL 2026)
ACL