@inproceedings{martinelli-etal-2026-domain,
title = "A Domain-Specific Curated Benchmark for Entity and Document-Level Relation Extraction",
author = "Martinelli, Marco and
Marchesin, Stefano and
Bonato, Vanessa and
Di Nunzio, Giorgio and
Ferro, Nicola and
Irrera, Ornella and
Menotti, Laura and
Vezzani, Federica and
Silvello, Gianmaria",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.301/",
pages = "5693--5711",
ISBN = "979-8-89176-386-9",
abstract = "Information Extraction (IE), encompassing Named Entity Recognition (NER), Named Entity Linking (NEL), and Relation Extraction (RE), is critical for transforming the rapidly growing volume of scientific publications into structured, actionable knowledge. This need is especially evident in fast-evolving biomedical fields such as the gut-brain axis, where research investigates complex interactions between the gut microbiota and brain-related disorders. Existing biomedical IE benchmarks, however, are often narrow in scope and rely heavily on distantly supervised or automatically generated annotations, limiting their utility for advancing robust IE methods. We introduce GutBrainIE, a benchmark based on more than 1,600 PubMed abstracts, manually annotated by biomedical and terminological experts with fine-grained entities, concept-level links, and relations. While grounded in the gut-brain axis, the benchmark{'}s rich schema, multiple tasks, and combination of highly curated and weakly supervised data make it broadly applicable to the development and evaluation of biomedical IE systems across domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="martinelli-etal-2026-domain">
<titleInfo>
<title>A Domain-Specific Curated Benchmark for Entity and Document-Level Relation Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Martinelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Marchesin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanessa</namePart>
<namePart type="family">Bonato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giorgio</namePart>
<namePart type="family">Di Nunzio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Ferro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ornella</namePart>
<namePart type="family">Irrera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Menotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federica</namePart>
<namePart type="family">Vezzani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gianmaria</namePart>
<namePart type="family">Silvello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Information Extraction (IE), encompassing Named Entity Recognition (NER), Named Entity Linking (NEL), and Relation Extraction (RE), is critical for transforming the rapidly growing volume of scientific publications into structured, actionable knowledge. This need is especially evident in fast-evolving biomedical fields such as the gut-brain axis, where research investigates complex interactions between the gut microbiota and brain-related disorders. Existing biomedical IE benchmarks, however, are often narrow in scope and rely heavily on distantly supervised or automatically generated annotations, limiting their utility for advancing robust IE methods. We introduce GutBrainIE, a benchmark based on more than 1,600 PubMed abstracts, manually annotated by biomedical and terminological experts with fine-grained entities, concept-level links, and relations. While grounded in the gut-brain axis, the benchmark’s rich schema, multiple tasks, and combination of highly curated and weakly supervised data make it broadly applicable to the development and evaluation of biomedical IE systems across domains.</abstract>
<identifier type="citekey">martinelli-etal-2026-domain</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.301/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>5693</start>
<end>5711</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Domain-Specific Curated Benchmark for Entity and Document-Level Relation Extraction
%A Martinelli, Marco
%A Marchesin, Stefano
%A Bonato, Vanessa
%A Di Nunzio, Giorgio
%A Ferro, Nicola
%A Irrera, Ornella
%A Menotti, Laura
%A Vezzani, Federica
%A Silvello, Gianmaria
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F martinelli-etal-2026-domain
%X Information Extraction (IE), encompassing Named Entity Recognition (NER), Named Entity Linking (NEL), and Relation Extraction (RE), is critical for transforming the rapidly growing volume of scientific publications into structured, actionable knowledge. This need is especially evident in fast-evolving biomedical fields such as the gut-brain axis, where research investigates complex interactions between the gut microbiota and brain-related disorders. Existing biomedical IE benchmarks, however, are often narrow in scope and rely heavily on distantly supervised or automatically generated annotations, limiting their utility for advancing robust IE methods. We introduce GutBrainIE, a benchmark based on more than 1,600 PubMed abstracts, manually annotated by biomedical and terminological experts with fine-grained entities, concept-level links, and relations. While grounded in the gut-brain axis, the benchmark’s rich schema, multiple tasks, and combination of highly curated and weakly supervised data make it broadly applicable to the development and evaluation of biomedical IE systems across domains.
%U https://aclanthology.org/2026.findings-eacl.301/
%P 5693-5711
Markdown (Informal)
[A Domain-Specific Curated Benchmark for Entity and Document-Level Relation Extraction](https://aclanthology.org/2026.findings-eacl.301/) (Martinelli et al., Findings 2026)
ACL
- Marco Martinelli, Stefano Marchesin, Vanessa Bonato, Giorgio Di Nunzio, Nicola Ferro, Ornella Irrera, Laura Menotti, Federica Vezzani, and Gianmaria Silvello. 2026. A Domain-Specific Curated Benchmark for Entity and Document-Level Relation Extraction. In Findings of the Association for Computational Linguistics: EACL 2026, pages 5693–5711, Rabat, Morocco. Association for Computational Linguistics.