@inproceedings{doneva-etal-2025-preclinie,
title = "{P}re{C}lin{IE}: An Annotated Corpus for Information Extraction in Preclinical Studies",
author = {Doneva, Simona and
Hubarava, Hanna and
H{\"a}rvelid, Pia and
Z{\"u}rrer, Wolfgang and
Bugajska, Julia and
Hild, Bernard and
Br{\"u}schweiler, David and
Schneider, Gerold and
Ellendorff, Tilia and
Ineichen, Benjamin},
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Tsujii, Junichi",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing",
month = aug,
year = "2025",
address = "Viena, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-1.8/",
doi = "10.18653/v1/2025.bionlp-1.8",
pages = "74--87",
ISBN = "979-8-89176-275-6",
abstract = "Animal research, sometimes referred to as preclinical research, plays a vital role in bridging the gap between basic science and clinical applications. However, the rapid increase in publications and the complexity of reported findings make it increasingly difficult for researchers to extract and assess relevant information. While automation through natural language processing (NLP) holds great potential for addressing this challenge, progress is hindered by the absence of high-quality, comprehensive annotated resources specific to preclinical studies. To fill this gap, we introduce PreClinIE, a fully open manually annotated dataset. The corpus consists of abstracts and methods sections from 725 publications, annotated for study rigor indicators (e.g., random allocation) and other study characteristics (e.g., species). We describe the data collection and annotation process, outlining the challenges of working with preclinical literature. By providing this resource, we aim to accelerate the development of NLP tools that enhance literature mining in preclinical research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="doneva-etal-2025-preclinie">
<titleInfo>
<title>PreClinIE: An Annotated Corpus for Information Extraction in Preclinical Studies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Doneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Hubarava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pia</namePart>
<namePart type="family">Härvelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Zürrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Bugajska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernard</namePart>
<namePart type="family">Hild</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Brüschweiler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerold</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tilia</namePart>
<namePart type="family">Ellendorff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Ineichen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Viena, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-275-6</identifier>
</relatedItem>
<abstract>Animal research, sometimes referred to as preclinical research, plays a vital role in bridging the gap between basic science and clinical applications. However, the rapid increase in publications and the complexity of reported findings make it increasingly difficult for researchers to extract and assess relevant information. While automation through natural language processing (NLP) holds great potential for addressing this challenge, progress is hindered by the absence of high-quality, comprehensive annotated resources specific to preclinical studies. To fill this gap, we introduce PreClinIE, a fully open manually annotated dataset. The corpus consists of abstracts and methods sections from 725 publications, annotated for study rigor indicators (e.g., random allocation) and other study characteristics (e.g., species). We describe the data collection and annotation process, outlining the challenges of working with preclinical literature. By providing this resource, we aim to accelerate the development of NLP tools that enhance literature mining in preclinical research.</abstract>
<identifier type="citekey">doneva-etal-2025-preclinie</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-1.8</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-1.8/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>74</start>
<end>87</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PreClinIE: An Annotated Corpus for Information Extraction in Preclinical Studies
%A Doneva, Simona
%A Hubarava, Hanna
%A Härvelid, Pia
%A Zürrer, Wolfgang
%A Bugajska, Julia
%A Hild, Bernard
%A Brüschweiler, David
%A Schneider, Gerold
%A Ellendorff, Tilia
%A Ineichen, Benjamin
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Tsujii, Junichi
%S Proceedings of the 24th Workshop on Biomedical Language Processing
%D 2025
%8 August
%I Association for Computational Linguistics
%C Viena, Austria
%@ 979-8-89176-275-6
%F doneva-etal-2025-preclinie
%X Animal research, sometimes referred to as preclinical research, plays a vital role in bridging the gap between basic science and clinical applications. However, the rapid increase in publications and the complexity of reported findings make it increasingly difficult for researchers to extract and assess relevant information. While automation through natural language processing (NLP) holds great potential for addressing this challenge, progress is hindered by the absence of high-quality, comprehensive annotated resources specific to preclinical studies. To fill this gap, we introduce PreClinIE, a fully open manually annotated dataset. The corpus consists of abstracts and methods sections from 725 publications, annotated for study rigor indicators (e.g., random allocation) and other study characteristics (e.g., species). We describe the data collection and annotation process, outlining the challenges of working with preclinical literature. By providing this resource, we aim to accelerate the development of NLP tools that enhance literature mining in preclinical research.
%R 10.18653/v1/2025.bionlp-1.8
%U https://aclanthology.org/2025.bionlp-1.8/
%U https://doi.org/10.18653/v1/2025.bionlp-1.8
%P 74-87
Markdown (Informal)
[PreClinIE: An Annotated Corpus for Information Extraction in Preclinical Studies](https://aclanthology.org/2025.bionlp-1.8/) (Doneva et al., BioNLP 2025)
ACL
- Simona Doneva, Hanna Hubarava, Pia Härvelid, Wolfgang Zürrer, Julia Bugajska, Bernard Hild, David Brüschweiler, Gerold Schneider, Tilia Ellendorff, and Benjamin Ineichen. 2025. PreClinIE: An Annotated Corpus for Information Extraction in Preclinical Studies. In Proceedings of the 24th Workshop on Biomedical Language Processing, pages 74–87, Viena, Austria. Association for Computational Linguistics.