@inproceedings{schumacher-etal-2025-raster,
title = "{RAST}e{R}: Robust, Agentic, and Structured Temporal Reasoning",
author = "Schumacher, Dan and
Haji, Fatemeh and
Grey, Tara and
Bandlamudi, Niharika and
Karnik, Nupoor and
Kumar, Gagana Uday and
Chiang, Cho-Yu Jason and
Najafirad, Peyman and
Vishwamitra, Nishant and
Rios, Anthony",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://aclanthology.org/2025.ijcnlp-long.166/",
pages = "3098--3123",
ISBN = "979-8-89176-298-5",
abstract = "Temporal question answering (TQA) remains a persistent challenge for large language models (LLMs), particularly in retrieval-augmented generation (RAG) settings where retrieved content may be irrelevant, outdated, or temporally inconsistent. This is especially critical in applications like clinical event ordering, policy tracking, and real-time decision-making, which require reliable temporal reasoning even under noisy or misleading context. To address this challenge, we introduce RASTeR: Robust, Agentic, and Structured, Temporal Reasoning, an agentic prompting framework that separates context evaluation from answer generation. RASTeR first assesses the relevance and temporal coherence of retrieved context, then constructs a structured temporal knowledge graph (TKG) to better facilitate reasoning. When inconsistencies are detected, RASTeR selectively corrects or discards context before generating an answer. Across multiple datasets and LLMs, RASTeR consistently improves robustness: defined here as the model{'}s ability to generate correct predictions despite suboptimal context. We further validate our approach through a ``needle-in-the-haystack'' study, in which relevant context is buried among irrelevant distractors. Even with forty distractors, RASTeR achieves 75{\%} accuracy, compared to the runner-up model, which reaches only 62{\%}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schumacher-etal-2025-raster">
<titleInfo>
<title>RASTeR: Robust, Agentic, and Structured Temporal Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Schumacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatemeh</namePart>
<namePart type="family">Haji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tara</namePart>
<namePart type="family">Grey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niharika</namePart>
<namePart type="family">Bandlamudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nupoor</namePart>
<namePart type="family">Karnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gagana</namePart>
<namePart type="given">Uday</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cho-Yu</namePart>
<namePart type="given">Jason</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peyman</namePart>
<namePart type="family">Najafirad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nishant</namePart>
<namePart type="family">Vishwamitra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anthony</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haofen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Biplab</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhirendra</namePart>
<namePart type="given">Pratap</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Asian Federation of Natural Language Processing and The Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-298-5</identifier>
</relatedItem>
<abstract>Temporal question answering (TQA) remains a persistent challenge for large language models (LLMs), particularly in retrieval-augmented generation (RAG) settings where retrieved content may be irrelevant, outdated, or temporally inconsistent. This is especially critical in applications like clinical event ordering, policy tracking, and real-time decision-making, which require reliable temporal reasoning even under noisy or misleading context. To address this challenge, we introduce RASTeR: Robust, Agentic, and Structured, Temporal Reasoning, an agentic prompting framework that separates context evaluation from answer generation. RASTeR first assesses the relevance and temporal coherence of retrieved context, then constructs a structured temporal knowledge graph (TKG) to better facilitate reasoning. When inconsistencies are detected, RASTeR selectively corrects or discards context before generating an answer. Across multiple datasets and LLMs, RASTeR consistently improves robustness: defined here as the model’s ability to generate correct predictions despite suboptimal context. We further validate our approach through a “needle-in-the-haystack” study, in which relevant context is buried among irrelevant distractors. Even with forty distractors, RASTeR achieves 75% accuracy, compared to the runner-up model, which reaches only 62%.</abstract>
<identifier type="citekey">schumacher-etal-2025-raster</identifier>
<location>
<url>https://aclanthology.org/2025.ijcnlp-long.166/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>3098</start>
<end>3123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RASTeR: Robust, Agentic, and Structured Temporal Reasoning
%A Schumacher, Dan
%A Haji, Fatemeh
%A Grey, Tara
%A Bandlamudi, Niharika
%A Karnik, Nupoor
%A Kumar, Gagana Uday
%A Chiang, Cho-Yu Jason
%A Najafirad, Peyman
%A Vishwamitra, Nishant
%A Rios, Anthony
%Y Inui, Kentaro
%Y Sakti, Sakriani
%Y Wang, Haofen
%Y Wong, Derek F.
%Y Bhattacharyya, Pushpak
%Y Banerjee, Biplab
%Y Ekbal, Asif
%Y Chakraborty, Tanmoy
%Y Singh, Dhirendra Pratap
%S Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics
%D 2025
%8 December
%I The Asian Federation of Natural Language Processing and The Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-298-5
%F schumacher-etal-2025-raster
%X Temporal question answering (TQA) remains a persistent challenge for large language models (LLMs), particularly in retrieval-augmented generation (RAG) settings where retrieved content may be irrelevant, outdated, or temporally inconsistent. This is especially critical in applications like clinical event ordering, policy tracking, and real-time decision-making, which require reliable temporal reasoning even under noisy or misleading context. To address this challenge, we introduce RASTeR: Robust, Agentic, and Structured, Temporal Reasoning, an agentic prompting framework that separates context evaluation from answer generation. RASTeR first assesses the relevance and temporal coherence of retrieved context, then constructs a structured temporal knowledge graph (TKG) to better facilitate reasoning. When inconsistencies are detected, RASTeR selectively corrects or discards context before generating an answer. Across multiple datasets and LLMs, RASTeR consistently improves robustness: defined here as the model’s ability to generate correct predictions despite suboptimal context. We further validate our approach through a “needle-in-the-haystack” study, in which relevant context is buried among irrelevant distractors. Even with forty distractors, RASTeR achieves 75% accuracy, compared to the runner-up model, which reaches only 62%.
%U https://aclanthology.org/2025.ijcnlp-long.166/
%P 3098-3123
Markdown (Informal)
[RASTeR: Robust, Agentic, and Structured Temporal Reasoning](https://aclanthology.org/2025.ijcnlp-long.166/) (Schumacher et al., IJCNLP-AACL 2025)
ACL
- Dan Schumacher, Fatemeh Haji, Tara Grey, Niharika Bandlamudi, Nupoor Karnik, Gagana Uday Kumar, Cho-Yu Jason Chiang, Peyman Najafirad, Nishant Vishwamitra, and Anthony Rios. 2025. RASTeR: Robust, Agentic, and Structured Temporal Reasoning. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 3098–3123, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.