@inproceedings{albornoz-de-luise-etal-2024-beyond,
title = "Beyond the Hype: Identifying and Analyzing Math Word Problem-Solving Challenges for Large Language Models",
author = "Albornoz-De Luise, Romina Soledad and
Arnau, David and
Arnau-Gonz{\'a}lez, Pablo and
Arevalillo-Herr{\'a}ez, Miguel",
editor = "Balloccu, Simone and
Kasner, Zden{\v{e}}k and
Pl{\'a}tek, Ond{\v{r}}ej and
Schmidtov{\'a}, Patr{\'\i}cia and
Onderkov{\'a}, Krist{\'y}na and
Lango, Mateusz and
Du{\v{s}}ek, Ond{\v{r}}ej and
Flek, Lucie and
Reiter, Ehud and
Gkatzia, Dimitra and
Mille, Simon",
booktitle = "Proceedings of the 2nd Workshop on Practical LLM-assisted Data-to-Text Generation",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.practicald2t-1.1",
pages = "1--6",
abstract = "Despite not being explicitly trained for this purpose, models like Mistral and LLaMA have demonstrated impressive results across numerous tasks, including generating solutions to Mathematical Word Problems (MWPs). A MWP involves translating a textual description into a mathematical model or equation that solving it. However, these models face challenges in accurately interpreting and utilizing the numerical information present in the MWP statements, which can lead to errors in the generated solutions. To better understand the limitations of LLMs, we analyzed the MWP where models failed to accurately solve problems from the SVAMP dataset. By categorizing these MWPs, we identify specific types of problems where the models are most prone to errors, providing insights into the underlying challenges faced by LLMs in problem-solving scenarios and open new modeling opportunities. By understanding the expected errors, researchers can design strategies to adequately model problems more effectively and choose the most suitable LLM for solving them taking into account each model{'}s strengths and weaknesses.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="albornoz-de-luise-etal-2024-beyond">
<titleInfo>
<title>Beyond the Hype: Identifying and Analyzing Math Word Problem-Solving Challenges for Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Romina</namePart>
<namePart type="given">Soledad</namePart>
<namePart type="family">Albornoz-De Luise</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Arnau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pablo</namePart>
<namePart type="family">Arnau-González</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="family">Arevalillo-Herráez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Practical LLM-assisted Data-to-Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zdeněk</namePart>
<namePart type="family">Kasner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Plátek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrícia</namePart>
<namePart type="family">Schmidtová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristýna</namePart>
<namePart type="family">Onderková</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mateusz</namePart>
<namePart type="family">Lango</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite not being explicitly trained for this purpose, models like Mistral and LLaMA have demonstrated impressive results across numerous tasks, including generating solutions to Mathematical Word Problems (MWPs). A MWP involves translating a textual description into a mathematical model or equation that solving it. However, these models face challenges in accurately interpreting and utilizing the numerical information present in the MWP statements, which can lead to errors in the generated solutions. To better understand the limitations of LLMs, we analyzed the MWP where models failed to accurately solve problems from the SVAMP dataset. By categorizing these MWPs, we identify specific types of problems where the models are most prone to errors, providing insights into the underlying challenges faced by LLMs in problem-solving scenarios and open new modeling opportunities. By understanding the expected errors, researchers can design strategies to adequately model problems more effectively and choose the most suitable LLM for solving them taking into account each model’s strengths and weaknesses.</abstract>
<identifier type="citekey">albornoz-de-luise-etal-2024-beyond</identifier>
<location>
<url>https://aclanthology.org/2024.practicald2t-1.1</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond the Hype: Identifying and Analyzing Math Word Problem-Solving Challenges for Large Language Models
%A Albornoz-De Luise, Romina Soledad
%A Arnau, David
%A Arnau-González, Pablo
%A Arevalillo-Herráez, Miguel
%Y Balloccu, Simone
%Y Kasner, Zdeněk
%Y Plátek, Ondřej
%Y Schmidtová, Patrícia
%Y Onderková, Kristýna
%Y Lango, Mateusz
%Y Dušek, Ondřej
%Y Flek, Lucie
%Y Reiter, Ehud
%Y Gkatzia, Dimitra
%Y Mille, Simon
%S Proceedings of the 2nd Workshop on Practical LLM-assisted Data-to-Text Generation
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F albornoz-de-luise-etal-2024-beyond
%X Despite not being explicitly trained for this purpose, models like Mistral and LLaMA have demonstrated impressive results across numerous tasks, including generating solutions to Mathematical Word Problems (MWPs). A MWP involves translating a textual description into a mathematical model or equation that solving it. However, these models face challenges in accurately interpreting and utilizing the numerical information present in the MWP statements, which can lead to errors in the generated solutions. To better understand the limitations of LLMs, we analyzed the MWP where models failed to accurately solve problems from the SVAMP dataset. By categorizing these MWPs, we identify specific types of problems where the models are most prone to errors, providing insights into the underlying challenges faced by LLMs in problem-solving scenarios and open new modeling opportunities. By understanding the expected errors, researchers can design strategies to adequately model problems more effectively and choose the most suitable LLM for solving them taking into account each model’s strengths and weaknesses.
%U https://aclanthology.org/2024.practicald2t-1.1
%P 1-6
Markdown (Informal)
[Beyond the Hype: Identifying and Analyzing Math Word Problem-Solving Challenges for Large Language Models](https://aclanthology.org/2024.practicald2t-1.1) (Albornoz-De Luise et al., PracticalD2T-WS 2024)
ACL