@inproceedings{mutalik-etal-2025-cpiqa,
title = "{CPIQA}: Climate Paper Image Question Answering Dataset for Retrieval-Augmented Generation with Context-based Query Expansion",
author = "Mutalik, Rudra and
Panchalingam, Abiram and
Singh, Loitongbam Gyanendro and
Osborn, Timothy J. and
Hawkins, Ed and
Middleton, Stuart E.",
editor = "Dutia, Kalyan and
Henderson, Peter and
Leippold, Markus and
Manning, Christoper and
Morio, Gaku and
Muccione, Veruska and
Ni, Jingwei and
Schimanski, Tobias and
Stammbach, Dominik and
Singh, Alok and
Su, Alba (Ruiran) and
A. Vaghefi, Saeid",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.climatenlp-1.15/",
doi = "10.18653/v1/2025.climatenlp-1.15",
pages = "218--232",
ISBN = "979-8-89176-259-6",
abstract = "Misinformation about climate science is a serious challenge for our society. This paper introduces CPIQA (Climate Paper Image Question-Answering), a new question-answer dataset featuring 4,551 full-text open-source academic papers in the area of climate science with 54,612 GPT-4o generated question-answer pairs. CPIQA contains four question types (numeric, figure-based, non-figure-based, reasoning), each generated using three user roles (expert, non-expert, climate sceptic). CPIQA is multimodal, incorporating information from figures and graphs with GPT-4o descriptive annotations. We describe Context-RAG, a novel method for RAG prompt decomposition and augmentation involving extracting distinct contexts for the question. Evaluation results for Context-RAG on the benchmark SPIQA dataset outperforms the previous best state of the art model in two out of three test cases. For our CPIQA dataset, Context-RAG outperforms our standard RAG baseline on all five base LLMs we tested, showing our novel contextual decomposition method can generalize to any LLM architecture. Expert evaluation of our best performing model (GPT-4o with Context-RAG) by climate science experts highlights strengths in precision and provenance tracking, particularly for figure-based and reasoning questions."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mutalik-etal-2025-cpiqa">
<titleInfo>
<title>CPIQA: Climate Paper Image Question Answering Dataset for Retrieval-Augmented Generation with Context-based Query Expansion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rudra</namePart>
<namePart type="family">Mutalik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abiram</namePart>
<namePart type="family">Panchalingam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Loitongbam</namePart>
<namePart type="given">Gyanendro</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Osborn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ed</namePart>
<namePart type="family">Hawkins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Middleton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kalyan</namePart>
<namePart type="family">Dutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Henderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Leippold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoper</namePart>
<namePart type="family">Manning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaku</namePart>
<namePart type="family">Morio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veruska</namePart>
<namePart type="family">Muccione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Schimanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Stammbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alba</namePart>
<namePart type="given">(Ruiran)</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saeid</namePart>
<namePart type="family">A. Vaghefi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-259-6</identifier>
</relatedItem>
<abstract>Misinformation about climate science is a serious challenge for our society. This paper introduces CPIQA (Climate Paper Image Question-Answering), a new question-answer dataset featuring 4,551 full-text open-source academic papers in the area of climate science with 54,612 GPT-4o generated question-answer pairs. CPIQA contains four question types (numeric, figure-based, non-figure-based, reasoning), each generated using three user roles (expert, non-expert, climate sceptic). CPIQA is multimodal, incorporating information from figures and graphs with GPT-4o descriptive annotations. We describe Context-RAG, a novel method for RAG prompt decomposition and augmentation involving extracting distinct contexts for the question. Evaluation results for Context-RAG on the benchmark SPIQA dataset outperforms the previous best state of the art model in two out of three test cases. For our CPIQA dataset, Context-RAG outperforms our standard RAG baseline on all five base LLMs we tested, showing our novel contextual decomposition method can generalize to any LLM architecture. Expert evaluation of our best performing model (GPT-4o with Context-RAG) by climate science experts highlights strengths in precision and provenance tracking, particularly for figure-based and reasoning questions.</abstract>
<identifier type="citekey">mutalik-etal-2025-cpiqa</identifier>
<identifier type="doi">10.18653/v1/2025.climatenlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2025.climatenlp-1.15/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>218</start>
<end>232</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CPIQA: Climate Paper Image Question Answering Dataset for Retrieval-Augmented Generation with Context-based Query Expansion
%A Mutalik, Rudra
%A Panchalingam, Abiram
%A Singh, Loitongbam Gyanendro
%A Osborn, Timothy J.
%A Hawkins, Ed
%A Middleton, Stuart E.
%Y Dutia, Kalyan
%Y Henderson, Peter
%Y Leippold, Markus
%Y Manning, Christoper
%Y Morio, Gaku
%Y Muccione, Veruska
%Y Ni, Jingwei
%Y Schimanski, Tobias
%Y Stammbach, Dominik
%Y Singh, Alok
%Y Su, Alba (Ruiran)
%Y A. Vaghefi, Saeid
%S Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-259-6
%F mutalik-etal-2025-cpiqa
%X Misinformation about climate science is a serious challenge for our society. This paper introduces CPIQA (Climate Paper Image Question-Answering), a new question-answer dataset featuring 4,551 full-text open-source academic papers in the area of climate science with 54,612 GPT-4o generated question-answer pairs. CPIQA contains four question types (numeric, figure-based, non-figure-based, reasoning), each generated using three user roles (expert, non-expert, climate sceptic). CPIQA is multimodal, incorporating information from figures and graphs with GPT-4o descriptive annotations. We describe Context-RAG, a novel method for RAG prompt decomposition and augmentation involving extracting distinct contexts for the question. Evaluation results for Context-RAG on the benchmark SPIQA dataset outperforms the previous best state of the art model in two out of three test cases. For our CPIQA dataset, Context-RAG outperforms our standard RAG baseline on all five base LLMs we tested, showing our novel contextual decomposition method can generalize to any LLM architecture. Expert evaluation of our best performing model (GPT-4o with Context-RAG) by climate science experts highlights strengths in precision and provenance tracking, particularly for figure-based and reasoning questions.
%R 10.18653/v1/2025.climatenlp-1.15
%U https://aclanthology.org/2025.climatenlp-1.15/
%U https://doi.org/10.18653/v1/2025.climatenlp-1.15
%P 218-232
Markdown (Informal)
[CPIQA: Climate Paper Image Question Answering Dataset for Retrieval-Augmented Generation with Context-based Query Expansion](https://aclanthology.org/2025.climatenlp-1.15/) (Mutalik et al., ClimateNLP 2025)
ACL