@inproceedings{yang-etal-2025-quest2dataagent,
title = "{Q}uest2{D}ata{A}gent: Automating End-to-End Scientific Data Collection",
author = "Yang, Tianyu and
Liu, Yuhan and
Alosious, Sobin and
Brown, Ethan A. and
Rohr, Jason R. and
Luo, Tengfei and
Zhang, Xiangliang",
editor = {Habernal, Ivan and
Schulam, Peter and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-demos.36/",
pages = "500--514",
ISBN = "979-8-89176-334-0",
abstract = "Scientific research often requires constructing high-quality datasets, yet the current workflows remain labor-intensive, and dependent on domain expertise. Existing approaches automate isolated steps such as retrieval or generation, but lack support for the full end-to-end data collection process. We present Quest2DataAgent, a general-purpose multi-agent framework for automating scientific data collection workflows. Given a natural language research question, it decomposes tasks into structured subtasks, retrieves relevant data using hybrid strategies, evaluates dataset quality, and generates visualizations through a conversational interface. We demonstrate its flexibility in two domains: EcoData for ecological research and PolyData for polymer materials. Both systems share the same core architecture but operate over distinct datasets and user needs. Human evaluations show that Quest2DataAgent significantly improves data relevance, usability, and time efficiency compared to manual collection and tool-assisted baselines. The framework is open-source and extensible to other domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2025-quest2dataagent">
<titleInfo>
<title>Quest2DataAgent: Automating End-to-End Scientific Data Collection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tianyu</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobin</namePart>
<namePart type="family">Alosious</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ethan</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Brown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Rohr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tengfei</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangliang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Schulam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-334-0</identifier>
</relatedItem>
<abstract>Scientific research often requires constructing high-quality datasets, yet the current workflows remain labor-intensive, and dependent on domain expertise. Existing approaches automate isolated steps such as retrieval or generation, but lack support for the full end-to-end data collection process. We present Quest2DataAgent, a general-purpose multi-agent framework for automating scientific data collection workflows. Given a natural language research question, it decomposes tasks into structured subtasks, retrieves relevant data using hybrid strategies, evaluates dataset quality, and generates visualizations through a conversational interface. We demonstrate its flexibility in two domains: EcoData for ecological research and PolyData for polymer materials. Both systems share the same core architecture but operate over distinct datasets and user needs. Human evaluations show that Quest2DataAgent significantly improves data relevance, usability, and time efficiency compared to manual collection and tool-assisted baselines. The framework is open-source and extensible to other domains.</abstract>
<identifier type="citekey">yang-etal-2025-quest2dataagent</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-demos.36/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>500</start>
<end>514</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quest2DataAgent: Automating End-to-End Scientific Data Collection
%A Yang, Tianyu
%A Liu, Yuhan
%A Alosious, Sobin
%A Brown, Ethan A.
%A Rohr, Jason R.
%A Luo, Tengfei
%A Zhang, Xiangliang
%Y Habernal, Ivan
%Y Schulam, Peter
%Y Tiedemann, Jörg
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-334-0
%F yang-etal-2025-quest2dataagent
%X Scientific research often requires constructing high-quality datasets, yet the current workflows remain labor-intensive, and dependent on domain expertise. Existing approaches automate isolated steps such as retrieval or generation, but lack support for the full end-to-end data collection process. We present Quest2DataAgent, a general-purpose multi-agent framework for automating scientific data collection workflows. Given a natural language research question, it decomposes tasks into structured subtasks, retrieves relevant data using hybrid strategies, evaluates dataset quality, and generates visualizations through a conversational interface. We demonstrate its flexibility in two domains: EcoData for ecological research and PolyData for polymer materials. Both systems share the same core architecture but operate over distinct datasets and user needs. Human evaluations show that Quest2DataAgent significantly improves data relevance, usability, and time efficiency compared to manual collection and tool-assisted baselines. The framework is open-source and extensible to other domains.
%U https://aclanthology.org/2025.emnlp-demos.36/
%P 500-514
Markdown (Informal)
[Quest2DataAgent: Automating End-to-End Scientific Data Collection](https://aclanthology.org/2025.emnlp-demos.36/) (Yang et al., EMNLP 2025)
ACL
- Tianyu Yang, Yuhan Liu, Sobin Alosious, Ethan A. Brown, Jason R. Rohr, Tengfei Luo, and Xiangliang Zhang. 2025. Quest2DataAgent: Automating End-to-End Scientific Data Collection. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 500–514, Suzhou, China. Association for Computational Linguistics.