@inproceedings{xie-etal-2026-gr1,
title = "{GR}1: Reinforcement-Enhanced {LLM} for Geoscience Reasoning",
author = "Xie, Yule and
Ding, Jiaxin and
Deng, Cheng and
Gao, Shiqing and
Zhang, Junran and
Zhang, Sibo and
Wang, Zeyuan and
Wu, Ke and
Ding, Xin and
Fu, Luoyi and
Jin, Meng and
Wang, Xinbing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1140/",
pages = "22730--22743",
ISBN = "979-8-89176-395-1",
abstract = "Reinforcement learning (RL) has recently shown remarkable ability to enhance reasoning in large language models (LLMs), yet its potential in scientific domains beyond mathematics remains largely unexplored. Geoscience questions couple broad factual knowledge with multi-step inference and often rely on visual evidence such as maps, cross-sections, and diagrams, making them a challenging but verifiable testbed for RL-based reasoning. To enable this study, we introduce GeoMC-10K, a dataset of 10,000 geoscience multiple-choice questions spanning physical to human geography and high-school to professional levels; over 30{\%} of the questions are image dependent. To support text-only RL on these multimodal questions, we design GeoM2T, a multi-agent framework that converts multimodal questions into descriptive text while preserving answerability and difficulty. Fine-tuning LLaMA-3.1-8B and Qwen-3-8B with Group Relative Policy Optimization (GRPO), incorporating a factual reward mechanism, yields GR1, which achieves absolute accuracy improvements of 5.9{\%} and 13.3{\%}, respectively, and it generalizes to out-of-distribution geoscience benchmarks. Together, GeoMC-10K, GeoM2T, and GR1 establish a scalable benchmark and baseline for RL-enhanced geoscience reasoning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xie-etal-2026-gr1">
<titleInfo>
<title>GR1: Reinforcement-Enhanced LLM for Geoscience Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yule</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaxin</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiqing</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junran</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sibo</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeyuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luoyi</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinbing</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Reinforcement learning (RL) has recently shown remarkable ability to enhance reasoning in large language models (LLMs), yet its potential in scientific domains beyond mathematics remains largely unexplored. Geoscience questions couple broad factual knowledge with multi-step inference and often rely on visual evidence such as maps, cross-sections, and diagrams, making them a challenging but verifiable testbed for RL-based reasoning. To enable this study, we introduce GeoMC-10K, a dataset of 10,000 geoscience multiple-choice questions spanning physical to human geography and high-school to professional levels; over 30% of the questions are image dependent. To support text-only RL on these multimodal questions, we design GeoM2T, a multi-agent framework that converts multimodal questions into descriptive text while preserving answerability and difficulty. Fine-tuning LLaMA-3.1-8B and Qwen-3-8B with Group Relative Policy Optimization (GRPO), incorporating a factual reward mechanism, yields GR1, which achieves absolute accuracy improvements of 5.9% and 13.3%, respectively, and it generalizes to out-of-distribution geoscience benchmarks. Together, GeoMC-10K, GeoM2T, and GR1 establish a scalable benchmark and baseline for RL-enhanced geoscience reasoning.</abstract>
<identifier type="citekey">xie-etal-2026-gr1</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1140/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>22730</start>
<end>22743</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GR1: Reinforcement-Enhanced LLM for Geoscience Reasoning
%A Xie, Yule
%A Ding, Jiaxin
%A Deng, Cheng
%A Gao, Shiqing
%A Zhang, Junran
%A Zhang, Sibo
%A Wang, Zeyuan
%A Wu, Ke
%A Ding, Xin
%A Fu, Luoyi
%A Jin, Meng
%A Wang, Xinbing
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F xie-etal-2026-gr1
%X Reinforcement learning (RL) has recently shown remarkable ability to enhance reasoning in large language models (LLMs), yet its potential in scientific domains beyond mathematics remains largely unexplored. Geoscience questions couple broad factual knowledge with multi-step inference and often rely on visual evidence such as maps, cross-sections, and diagrams, making them a challenging but verifiable testbed for RL-based reasoning. To enable this study, we introduce GeoMC-10K, a dataset of 10,000 geoscience multiple-choice questions spanning physical to human geography and high-school to professional levels; over 30% of the questions are image dependent. To support text-only RL on these multimodal questions, we design GeoM2T, a multi-agent framework that converts multimodal questions into descriptive text while preserving answerability and difficulty. Fine-tuning LLaMA-3.1-8B and Qwen-3-8B with Group Relative Policy Optimization (GRPO), incorporating a factual reward mechanism, yields GR1, which achieves absolute accuracy improvements of 5.9% and 13.3%, respectively, and it generalizes to out-of-distribution geoscience benchmarks. Together, GeoMC-10K, GeoM2T, and GR1 establish a scalable benchmark and baseline for RL-enhanced geoscience reasoning.
%U https://aclanthology.org/2026.findings-acl.1140/
%P 22730-22743
Markdown (Informal)
[GR1: Reinforcement-Enhanced LLM for Geoscience Reasoning](https://aclanthology.org/2026.findings-acl.1140/) (Xie et al., Findings 2026)
ACL
- Yule Xie, Jiaxin Ding, Cheng Deng, Shiqing Gao, Junran Zhang, Sibo Zhang, Zeyuan Wang, Ke Wu, Xin Ding, Luoyi Fu, Meng Jin, and Xinbing Wang. 2026. GR1: Reinforcement-Enhanced LLM for Geoscience Reasoning. In Findings of the Association for Computational Linguistics: ACL 2026, pages 22730–22743, San Diego, California, United States. Association for Computational Linguistics.