@inproceedings{er-2026-math,
title = "Math-{DB}: A Discourse Framework for Mathematical Word Problems to Enhance {LLM} Reasoning",
author = "Er, Mustafa Erolcan",
editor = "Liu, Yang Janet and
Gessler, Luke",
booktitle = "Proceedings of the 20th Linguistic Annotation Workshop ({LAW} {XX})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.law-main.7/",
pages = "75--94",
ISBN = "979-8-89176-404-0",
abstract = "Large Language Models have demonstrated significant progress in solving mathematical word problems through techniques like Chain-of-Thought (CoT) prompting. However, recent research indicates that these models often rely on statistical regularities and surface-level patterns rather than true logical reasoning, leading to performance drops when faced with minor problem perturbations or irrelevant information. In this study, we introduce Math Discourse Bank (Math-DB), a novel discourse framework and annotated dataset designed to enhance LLM reasoning. Inspired by the Penn Discourse TreeBank (PDTB) and mathematics education research, Math-DB defines a hierarchy of discourse senses designed for quantitative reasoning, including categories such as Change, Combine, Compare, and Equalize. We applied this framework to the GSM-Symbolic dataset of 12,500 problems, yielding 47,815 sense-labeled discourse relations over 11,414 successfully-aligned instances (91.3{\%} pipeline yield). Our experiments demonstrate that incorporating Math-DB annotations into CoT prompts consistently improves LLM performance across various difficulty levels."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="er-2026-math">
<titleInfo>
<title>Math-DB: A Discourse Framework for Mathematical Word Problems to Enhance LLM Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="given">Erolcan</namePart>
<namePart type="family">Er</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Linguistic Annotation Workshop (LAW XX)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="given">Janet</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Gessler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-404-0</identifier>
</relatedItem>
<abstract>Large Language Models have demonstrated significant progress in solving mathematical word problems through techniques like Chain-of-Thought (CoT) prompting. However, recent research indicates that these models often rely on statistical regularities and surface-level patterns rather than true logical reasoning, leading to performance drops when faced with minor problem perturbations or irrelevant information. In this study, we introduce Math Discourse Bank (Math-DB), a novel discourse framework and annotated dataset designed to enhance LLM reasoning. Inspired by the Penn Discourse TreeBank (PDTB) and mathematics education research, Math-DB defines a hierarchy of discourse senses designed for quantitative reasoning, including categories such as Change, Combine, Compare, and Equalize. We applied this framework to the GSM-Symbolic dataset of 12,500 problems, yielding 47,815 sense-labeled discourse relations over 11,414 successfully-aligned instances (91.3% pipeline yield). Our experiments demonstrate that incorporating Math-DB annotations into CoT prompts consistently improves LLM performance across various difficulty levels.</abstract>
<identifier type="citekey">er-2026-math</identifier>
<location>
<url>https://aclanthology.org/2026.law-main.7/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>75</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Math-DB: A Discourse Framework for Mathematical Word Problems to Enhance LLM Reasoning
%A Er, Mustafa Erolcan
%Y Liu, Yang Janet
%Y Gessler, Luke
%S Proceedings of the 20th Linguistic Annotation Workshop (LAW XX)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-404-0
%F er-2026-math
%X Large Language Models have demonstrated significant progress in solving mathematical word problems through techniques like Chain-of-Thought (CoT) prompting. However, recent research indicates that these models often rely on statistical regularities and surface-level patterns rather than true logical reasoning, leading to performance drops when faced with minor problem perturbations or irrelevant information. In this study, we introduce Math Discourse Bank (Math-DB), a novel discourse framework and annotated dataset designed to enhance LLM reasoning. Inspired by the Penn Discourse TreeBank (PDTB) and mathematics education research, Math-DB defines a hierarchy of discourse senses designed for quantitative reasoning, including categories such as Change, Combine, Compare, and Equalize. We applied this framework to the GSM-Symbolic dataset of 12,500 problems, yielding 47,815 sense-labeled discourse relations over 11,414 successfully-aligned instances (91.3% pipeline yield). Our experiments demonstrate that incorporating Math-DB annotations into CoT prompts consistently improves LLM performance across various difficulty levels.
%U https://aclanthology.org/2026.law-main.7/
%P 75-94
Markdown (Informal)
[Math-DB: A Discourse Framework for Mathematical Word Problems to Enhance LLM Reasoning](https://aclanthology.org/2026.law-main.7/) (Er, LAW 2026)
ACL