@inproceedings{bolucu-etal-2026-using,
title = "Using a Human-{AI} Teaming Approach to Create and Curate Scientific Datasets with the {S}ci{L}ire System",
author = {B{\"o}l{\"u}c{\"u}, Necva and
Irons, Jessica and
Lee, Changhyun and
Jin, Brian and
Rybinski, Maciej and
Yang, Huichen and
Duenser, Andreas and
Wan, Stephen},
editor = "Croce, Danilo and
Leidner, Jochen and
Moosavi, Nafise Sadat",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-demo.30/",
pages = "428--444",
ISBN = "979-8-89176-382-1",
abstract = "The rapid growth of scientific literature has made manual extraction of structured knowledge increasingly impractical. To address this challenge, we introduce SCILIRE, a system for creating datasets from scientific literature. SCILIRE has been designed around Human-AI teaming principles centred on workflows for verifying and curating data. It facilitates an iterative workflow in which researchers can review and correct AI outputs. Furthermore, this interaction is used as a feedback signal to improve future LLM-based inference. We evaluate our design using a combination of intrinsic benchmarking outcomes together with real-world case studies across multiple domains. The results demonstrate that SCILIRE improves extraction fidelity and facilitates efficient dataset creation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bolucu-etal-2026-using">
<titleInfo>
<title>Using a Human-AI Teaming Approach to Create and Curate Scientific Datasets with the SciLire System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Necva</namePart>
<namePart type="family">Bölücü</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jessica</namePart>
<namePart type="family">Irons</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Changhyun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Rybinski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huichen</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Duenser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jochen</namePart>
<namePart type="family">Leidner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-382-1</identifier>
</relatedItem>
<abstract>The rapid growth of scientific literature has made manual extraction of structured knowledge increasingly impractical. To address this challenge, we introduce SCILIRE, a system for creating datasets from scientific literature. SCILIRE has been designed around Human-AI teaming principles centred on workflows for verifying and curating data. It facilitates an iterative workflow in which researchers can review and correct AI outputs. Furthermore, this interaction is used as a feedback signal to improve future LLM-based inference. We evaluate our design using a combination of intrinsic benchmarking outcomes together with real-world case studies across multiple domains. The results demonstrate that SCILIRE improves extraction fidelity and facilitates efficient dataset creation.</abstract>
<identifier type="citekey">bolucu-etal-2026-using</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-demo.30/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>428</start>
<end>444</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using a Human-AI Teaming Approach to Create and Curate Scientific Datasets with the SciLire System
%A Bölücü, Necva
%A Irons, Jessica
%A Lee, Changhyun
%A Jin, Brian
%A Rybinski, Maciej
%A Yang, Huichen
%A Duenser, Andreas
%A Wan, Stephen
%Y Croce, Danilo
%Y Leidner, Jochen
%Y Moosavi, Nafise Sadat
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-382-1
%F bolucu-etal-2026-using
%X The rapid growth of scientific literature has made manual extraction of structured knowledge increasingly impractical. To address this challenge, we introduce SCILIRE, a system for creating datasets from scientific literature. SCILIRE has been designed around Human-AI teaming principles centred on workflows for verifying and curating data. It facilitates an iterative workflow in which researchers can review and correct AI outputs. Furthermore, this interaction is used as a feedback signal to improve future LLM-based inference. We evaluate our design using a combination of intrinsic benchmarking outcomes together with real-world case studies across multiple domains. The results demonstrate that SCILIRE improves extraction fidelity and facilitates efficient dataset creation.
%U https://aclanthology.org/2026.eacl-demo.30/
%P 428-444
Markdown (Informal)
[Using a Human-AI Teaming Approach to Create and Curate Scientific Datasets with the SciLire System](https://aclanthology.org/2026.eacl-demo.30/) (Bölücü et al., EACL 2026)
ACL
- Necva Bölücü, Jessica Irons, Changhyun Lee, Brian Jin, Maciej Rybinski, Huichen Yang, Andreas Duenser, and Stephen Wan. 2026. Using a Human-AI Teaming Approach to Create and Curate Scientific Datasets with the SciLire System. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 428–444, Rabat, Marocco. Association for Computational Linguistics.