@inproceedings{sun-etal-2025-docspiral,
title = "{D}oc{S}piral: A Platform for Integrated Assistive Document Annotation through Human-in-the-Spiral",
author = "Sun, Qiang and
Li, Sirui and
Bi, Tingting and
Huynh, Du Q. and
Reynolds, Mark and
Luo, Yuanyi and
Liu, Wei",
editor = "Mishra, Pushkar and
Muresan, Smaranda and
Yu, Tao",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-demo.26/",
doi = "10.18653/v1/2025.acl-demo.26",
pages = "267--274",
ISBN = "979-8-89176-253-4",
abstract = "Acquiring structured data from domain-specific, image-based documents{---}such as scanned reports{---}is crucial for many downstream tasks but remains challenging due to document variability. Many of these documents exist as images rather than as machine-readable text, which requires human annotation to train automated extraction systems.We present \textbf{DocSpiral}, the first Human-in-the-Spiral assistive document annotation platform, designed to address the challenge of extracting structured information from domain-specific, image-based document collections.Our spiral design establishes an iterative cycle in which human annotations train models that progressively require less manual intervention. \textbf{DocSpiral} integrates document format normalization, comprehensive annotation interfaces, evaluation metrics dashboard, and API endpoints for the development of AI / ML models into a unified workflow.Experiments demonstrate that our framework reduces annotation time by at least 41{\%} while showing consistent performance gains across three iterations during model training.By making this annotation platform freely accessible, we aim to lower barriers to AI/ML models development in document processing, facilitating the adoption of large language models in image-based, document-intensive fields such as geoscience and healthcare. The system is freely available at: \url{https://app.ai4wa.com}. The demonstration video is available: \url{https://app.ai4wa.com/docs/docspiral/demo}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sun-etal-2025-docspiral">
<titleInfo>
<title>DocSpiral: A Platform for Integrated Assistive Document Annotation through Human-in-the-Spiral</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sirui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tingting</namePart>
<namePart type="family">Bi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Du</namePart>
<namePart type="given">Q</namePart>
<namePart type="family">Huynh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Reynolds</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanyi</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pushkar</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-253-4</identifier>
</relatedItem>
<abstract>Acquiring structured data from domain-specific, image-based documents—such as scanned reports—is crucial for many downstream tasks but remains challenging due to document variability. Many of these documents exist as images rather than as machine-readable text, which requires human annotation to train automated extraction systems.We present DocSpiral, the first Human-in-the-Spiral assistive document annotation platform, designed to address the challenge of extracting structured information from domain-specific, image-based document collections.Our spiral design establishes an iterative cycle in which human annotations train models that progressively require less manual intervention. DocSpiral integrates document format normalization, comprehensive annotation interfaces, evaluation metrics dashboard, and API endpoints for the development of AI / ML models into a unified workflow.Experiments demonstrate that our framework reduces annotation time by at least 41% while showing consistent performance gains across three iterations during model training.By making this annotation platform freely accessible, we aim to lower barriers to AI/ML models development in document processing, facilitating the adoption of large language models in image-based, document-intensive fields such as geoscience and healthcare. The system is freely available at: https://app.ai4wa.com. The demonstration video is available: https://app.ai4wa.com/docs/docspiral/demo.</abstract>
<identifier type="citekey">sun-etal-2025-docspiral</identifier>
<identifier type="doi">10.18653/v1/2025.acl-demo.26</identifier>
<location>
<url>https://aclanthology.org/2025.acl-demo.26/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>267</start>
<end>274</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DocSpiral: A Platform for Integrated Assistive Document Annotation through Human-in-the-Spiral
%A Sun, Qiang
%A Li, Sirui
%A Bi, Tingting
%A Huynh, Du Q.
%A Reynolds, Mark
%A Luo, Yuanyi
%A Liu, Wei
%Y Mishra, Pushkar
%Y Muresan, Smaranda
%Y Yu, Tao
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-253-4
%F sun-etal-2025-docspiral
%X Acquiring structured data from domain-specific, image-based documents—such as scanned reports—is crucial for many downstream tasks but remains challenging due to document variability. Many of these documents exist as images rather than as machine-readable text, which requires human annotation to train automated extraction systems.We present DocSpiral, the first Human-in-the-Spiral assistive document annotation platform, designed to address the challenge of extracting structured information from domain-specific, image-based document collections.Our spiral design establishes an iterative cycle in which human annotations train models that progressively require less manual intervention. DocSpiral integrates document format normalization, comprehensive annotation interfaces, evaluation metrics dashboard, and API endpoints for the development of AI / ML models into a unified workflow.Experiments demonstrate that our framework reduces annotation time by at least 41% while showing consistent performance gains across three iterations during model training.By making this annotation platform freely accessible, we aim to lower barriers to AI/ML models development in document processing, facilitating the adoption of large language models in image-based, document-intensive fields such as geoscience and healthcare. The system is freely available at: https://app.ai4wa.com. The demonstration video is available: https://app.ai4wa.com/docs/docspiral/demo.
%R 10.18653/v1/2025.acl-demo.26
%U https://aclanthology.org/2025.acl-demo.26/
%U https://doi.org/10.18653/v1/2025.acl-demo.26
%P 267-274
Markdown (Informal)
[DocSpiral: A Platform for Integrated Assistive Document Annotation through Human-in-the-Spiral](https://aclanthology.org/2025.acl-demo.26/) (Sun et al., ACL 2025)
ACL