@inproceedings{lee-etal-2025-automatic,
title = "Automatic Mathematic In-Context Example Generation for {LLM} Using Multi-Modal Consistency",
author = "Lee, Jaeseong and
Yang, Wei and
Gupta, Gopal and
Wei, Shiyi",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.597/",
pages = "8908--8924",
abstract = "Large Language Models (LLMs) have advanced Natural Language Processing (NLP) tasks but are limited in mathematical reasoning. To address this, few-shot examples are used in prompts for in-context learning. However, existing methods require annotated datasets, resulting in higher computational costs and lower quality examples. To mitigate these limitations, we propose AutoMathIC, a framework that automatically generates high-quality in-context examples to enhance LLMs' mathematical reasoning. AutoMathIC ensures consistency across different modalities (e.g., Chain-of-Thought (CoT), code snippets, and equations) by generating and selecting mutations that improve response consistency. Evaluated on four math problem datasets, AutoMathIC outperforms six baselines, with LLM accuracy ranging from 87.0{\%} to 99.3{\%} for GPT-3.5 and 93.1{\%} to 98.7{\%} for GPT-4o-mini. It surpasses the state-of-the-art in-context example retrieval method in three of the four datasets by 0.3{\%} to 11.8{\%}, without relying on an annotated dataset."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2025-automatic">
<titleInfo>
<title>Automatic Mathematic In-Context Example Generation for LLM Using Multi-Modal Consistency</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jaeseong</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gopal</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiyi</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) have advanced Natural Language Processing (NLP) tasks but are limited in mathematical reasoning. To address this, few-shot examples are used in prompts for in-context learning. However, existing methods require annotated datasets, resulting in higher computational costs and lower quality examples. To mitigate these limitations, we propose AutoMathIC, a framework that automatically generates high-quality in-context examples to enhance LLMs’ mathematical reasoning. AutoMathIC ensures consistency across different modalities (e.g., Chain-of-Thought (CoT), code snippets, and equations) by generating and selecting mutations that improve response consistency. Evaluated on four math problem datasets, AutoMathIC outperforms six baselines, with LLM accuracy ranging from 87.0% to 99.3% for GPT-3.5 and 93.1% to 98.7% for GPT-4o-mini. It surpasses the state-of-the-art in-context example retrieval method in three of the four datasets by 0.3% to 11.8%, without relying on an annotated dataset.</abstract>
<identifier type="citekey">lee-etal-2025-automatic</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.597/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>8908</start>
<end>8924</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Mathematic In-Context Example Generation for LLM Using Multi-Modal Consistency
%A Lee, Jaeseong
%A Yang, Wei
%A Gupta, Gopal
%A Wei, Shiyi
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F lee-etal-2025-automatic
%X Large Language Models (LLMs) have advanced Natural Language Processing (NLP) tasks but are limited in mathematical reasoning. To address this, few-shot examples are used in prompts for in-context learning. However, existing methods require annotated datasets, resulting in higher computational costs and lower quality examples. To mitigate these limitations, we propose AutoMathIC, a framework that automatically generates high-quality in-context examples to enhance LLMs’ mathematical reasoning. AutoMathIC ensures consistency across different modalities (e.g., Chain-of-Thought (CoT), code snippets, and equations) by generating and selecting mutations that improve response consistency. Evaluated on four math problem datasets, AutoMathIC outperforms six baselines, with LLM accuracy ranging from 87.0% to 99.3% for GPT-3.5 and 93.1% to 98.7% for GPT-4o-mini. It surpasses the state-of-the-art in-context example retrieval method in three of the four datasets by 0.3% to 11.8%, without relying on an annotated dataset.
%U https://aclanthology.org/2025.coling-main.597/
%P 8908-8924
Markdown (Informal)
[Automatic Mathematic In-Context Example Generation for LLM Using Multi-Modal Consistency](https://aclanthology.org/2025.coling-main.597/) (Lee et al., COLING 2025)
ACL