@inproceedings{zhang-etal-2025-scitat,
title = "{SCITAT}: A Question Answering Benchmark for Scientific Tables and Text Covering Diverse Reasoning Types",
author = "Zhang, Xuanliang and
Wang, Dingzirui and
Wang, Baoxin and
Dou, Longxu and
Lu, Xinyuan and
Xu, Keyan and
Wu, Dayong and
Zhu, Qingfu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.199/",
doi = "10.18653/v1/2025.findings-acl.199",
pages = "3859--3881",
ISBN = "979-8-89176-256-5",
abstract = "Scientific question answering (SQA) is an important task aimed at answering questions based on papers. However, current SQA datasets have limited reasoning types and neglect the relevance between tables and text, creating a significant gap with real scenarios. To address these challenges, we propose a QA benchmark for scientific tables and text with diverse reasoning types (SCITAT). To cover more reasoning types, we summarize various reasoning types from real-world questions. To reason on both tables and text, we require the questions to incorporate tables and text as much as possible. Based on SCITAT, we propose a baseline (CAR), which combines various reasoning methods to address different reasoning types and process tables and text at the same time. CAR brings average improvements of 4.1{\%} over other baselines on SCITAT, validating its effectiveness. Error analysis reveals the challenges of SCITAT, such as complex numerical calculations and domain knowledge."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-scitat">
<titleInfo>
<title>SCITAT: A Question Answering Benchmark for Scientific Tables and Text Covering Diverse Reasoning Types</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xuanliang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dingzirui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baoxin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Longxu</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyuan</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keyan</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayong</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingfu</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Scientific question answering (SQA) is an important task aimed at answering questions based on papers. However, current SQA datasets have limited reasoning types and neglect the relevance between tables and text, creating a significant gap with real scenarios. To address these challenges, we propose a QA benchmark for scientific tables and text with diverse reasoning types (SCITAT). To cover more reasoning types, we summarize various reasoning types from real-world questions. To reason on both tables and text, we require the questions to incorporate tables and text as much as possible. Based on SCITAT, we propose a baseline (CAR), which combines various reasoning methods to address different reasoning types and process tables and text at the same time. CAR brings average improvements of 4.1% over other baselines on SCITAT, validating its effectiveness. Error analysis reveals the challenges of SCITAT, such as complex numerical calculations and domain knowledge.</abstract>
<identifier type="citekey">zhang-etal-2025-scitat</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.199</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.199/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>3859</start>
<end>3881</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SCITAT: A Question Answering Benchmark for Scientific Tables and Text Covering Diverse Reasoning Types
%A Zhang, Xuanliang
%A Wang, Dingzirui
%A Wang, Baoxin
%A Dou, Longxu
%A Lu, Xinyuan
%A Xu, Keyan
%A Wu, Dayong
%A Zhu, Qingfu
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F zhang-etal-2025-scitat
%X Scientific question answering (SQA) is an important task aimed at answering questions based on papers. However, current SQA datasets have limited reasoning types and neglect the relevance between tables and text, creating a significant gap with real scenarios. To address these challenges, we propose a QA benchmark for scientific tables and text with diverse reasoning types (SCITAT). To cover more reasoning types, we summarize various reasoning types from real-world questions. To reason on both tables and text, we require the questions to incorporate tables and text as much as possible. Based on SCITAT, we propose a baseline (CAR), which combines various reasoning methods to address different reasoning types and process tables and text at the same time. CAR brings average improvements of 4.1% over other baselines on SCITAT, validating its effectiveness. Error analysis reveals the challenges of SCITAT, such as complex numerical calculations and domain knowledge.
%R 10.18653/v1/2025.findings-acl.199
%U https://aclanthology.org/2025.findings-acl.199/
%U https://doi.org/10.18653/v1/2025.findings-acl.199
%P 3859-3881
Markdown (Informal)
[SCITAT: A Question Answering Benchmark for Scientific Tables and Text Covering Diverse Reasoning Types](https://aclanthology.org/2025.findings-acl.199/) (Zhang et al., Findings 2025)
ACL
- Xuanliang Zhang, Dingzirui Wang, Baoxin Wang, Longxu Dou, Xinyuan Lu, Keyan Xu, Dayong Wu, and Qingfu Zhu. 2025. SCITAT: A Question Answering Benchmark for Scientific Tables and Text Covering Diverse Reasoning Types. In Findings of the Association for Computational Linguistics: ACL 2025, pages 3859–3881, Vienna, Austria. Association for Computational Linguistics.