@inproceedings{maury-etal-2026-aabaac,
title = "{AA}b{AAC}: An Annotated Corpus for Autoimmunity Information Extraction",
author = "Maury, Fabien and
Grosdidier, Sol{\`e}ne and
De Dieuleveult, Maud and
Coulet, Adrien",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-1.65/",
pages = "791--800",
ISBN = "979-8-89176-434-7",
abstract = "Despite advances in information extraction driven by deep learning and large language models, performance gaps remain in highly specialized biomedical fields, where domain-specific complexity poses challenges for generalist models.In this work, we focus on the domain of autoimmunity where the main entities of interest are autoimmune diseases, autoantibodies (i.e. molecules that may mark or cause these diseases), their molecular targets, their location in the body, and the associated clinical signs. Herein, we present AAbAAC (AutoAntibodies and Autoimmunity Annotated Corpus), a corpus of 115 abstracts selected from PubMed that we manually annotated for those entities and their relationships. First, AAbAAC was used to evaluate several methods on the task of named entity recognition (NER), and second, to fine-tune NER models. Our study demonstrates the utility of AAbAAC for information extraction in the domain of autoimmunity, showing expected improvement in NER performance after fine-tuning. This illustrates the value of small-scale annotation efforts for specialized domains and contributes to the computational study of autoimmunity. The AAbAAC corpus is available at: https://github.com/f-maury/AAbAAC ."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maury-etal-2026-aabaac">
<titleInfo>
<title>AAbAAC: An Annotated Corpus for Autoimmunity Information Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fabien</namePart>
<namePart type="family">Maury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Solène</namePart>
<namePart type="family">Grosdidier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maud</namePart>
<namePart type="family">De Dieuleveult</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrien</namePart>
<namePart type="family">Coulet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-434-7</identifier>
</relatedItem>
<abstract>Despite advances in information extraction driven by deep learning and large language models, performance gaps remain in highly specialized biomedical fields, where domain-specific complexity poses challenges for generalist models.In this work, we focus on the domain of autoimmunity where the main entities of interest are autoimmune diseases, autoantibodies (i.e. molecules that may mark or cause these diseases), their molecular targets, their location in the body, and the associated clinical signs. Herein, we present AAbAAC (AutoAntibodies and Autoimmunity Annotated Corpus), a corpus of 115 abstracts selected from PubMed that we manually annotated for those entities and their relationships. First, AAbAAC was used to evaluate several methods on the task of named entity recognition (NER), and second, to fine-tune NER models. Our study demonstrates the utility of AAbAAC for information extraction in the domain of autoimmunity, showing expected improvement in NER performance after fine-tuning. This illustrates the value of small-scale annotation efforts for specialized domains and contributes to the computational study of autoimmunity. The AAbAAC corpus is available at: https://github.com/f-maury/AAbAAC .</abstract>
<identifier type="citekey">maury-etal-2026-aabaac</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-1.65/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>791</start>
<end>800</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AAbAAC: An Annotated Corpus for Autoimmunity Information Extraction
%A Maury, Fabien
%A Grosdidier, Solène
%A De Dieuleveult, Maud
%A Coulet, Adrien
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S BioNLP 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-434-7
%F maury-etal-2026-aabaac
%X Despite advances in information extraction driven by deep learning and large language models, performance gaps remain in highly specialized biomedical fields, where domain-specific complexity poses challenges for generalist models.In this work, we focus on the domain of autoimmunity where the main entities of interest are autoimmune diseases, autoantibodies (i.e. molecules that may mark or cause these diseases), their molecular targets, their location in the body, and the associated clinical signs. Herein, we present AAbAAC (AutoAntibodies and Autoimmunity Annotated Corpus), a corpus of 115 abstracts selected from PubMed that we manually annotated for those entities and their relationships. First, AAbAAC was used to evaluate several methods on the task of named entity recognition (NER), and second, to fine-tune NER models. Our study demonstrates the utility of AAbAAC for information extraction in the domain of autoimmunity, showing expected improvement in NER performance after fine-tuning. This illustrates the value of small-scale annotation efforts for specialized domains and contributes to the computational study of autoimmunity. The AAbAAC corpus is available at: https://github.com/f-maury/AAbAAC .
%U https://aclanthology.org/2026.bionlp-1.65/
%P 791-800
Markdown (Informal)
[AAbAAC: An Annotated Corpus for Autoimmunity Information Extraction](https://aclanthology.org/2026.bionlp-1.65/) (Maury et al., BioNLP 2026)
ACL