@inproceedings{figueroa-rosero-etal-2025-evaluating,
title = "Evaluating Financial Literacy of Large Language Models through Domain Specific Languages for Plain Text Accounting",
author = "Figueroa Rosero, Alexei Gustavo and
Grundmann, Paul and
Freidank, Julius and
Nejdl, Wolfgang and
Loeser, Alexander",
editor = "Chen, Chung-Chi and
Moreno-Sandoval, Antonio and
Huang, Jimin and
Xie, Qianqian and
Ananiadou, Sophia and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.finnlp-1.6/",
pages = "63--75",
abstract = "Large language models (LLMs) have proven highly effective for a wide range of tasks, including code generation. Recently, advancements in their capabilities have shown promise in areas like mathematical reasoning, chain-of-thought processes and self-reflection. However, their effectiveness in domains requiring nuanced understanding of financial contexts, such as accounting, remains unclear. In this study, we evaluate how well LLMs perform in generating code for domain-specific languages (DSLs) in accounting, using Beancount as a case study. We create a set of tasks based on common financial ratios, to evaluate the numeracy and financial literacy of LLMs. Our findings reveal that while LLMs are state-of-the art in generative tasks, they struggle severely with accounting, often producing inaccurate calculations and misinterpreting financial scenarios. We characterize these shortcomings through a comprehensive evaluation, shedding light on the limitations of LLMs in understanding and handling money-related tasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="figueroa-rosero-etal-2025-evaluating">
<titleInfo>
<title>Evaluating Financial Literacy of Large Language Models through Domain Specific Languages for Plain Text Accounting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexei</namePart>
<namePart type="given">Gustavo</namePart>
<namePart type="family">Figueroa Rosero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Grundmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julius</namePart>
<namePart type="family">Freidank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Nejdl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Loeser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Moreno-Sandoval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qianqian</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have proven highly effective for a wide range of tasks, including code generation. Recently, advancements in their capabilities have shown promise in areas like mathematical reasoning, chain-of-thought processes and self-reflection. However, their effectiveness in domains requiring nuanced understanding of financial contexts, such as accounting, remains unclear. In this study, we evaluate how well LLMs perform in generating code for domain-specific languages (DSLs) in accounting, using Beancount as a case study. We create a set of tasks based on common financial ratios, to evaluate the numeracy and financial literacy of LLMs. Our findings reveal that while LLMs are state-of-the art in generative tasks, they struggle severely with accounting, often producing inaccurate calculations and misinterpreting financial scenarios. We characterize these shortcomings through a comprehensive evaluation, shedding light on the limitations of LLMs in understanding and handling money-related tasks.</abstract>
<identifier type="citekey">figueroa-rosero-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.finnlp-1.6/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>63</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Financial Literacy of Large Language Models through Domain Specific Languages for Plain Text Accounting
%A Figueroa Rosero, Alexei Gustavo
%A Grundmann, Paul
%A Freidank, Julius
%A Nejdl, Wolfgang
%A Loeser, Alexander
%Y Chen, Chung-Chi
%Y Moreno-Sandoval, Antonio
%Y Huang, Jimin
%Y Xie, Qianqian
%Y Ananiadou, Sophia
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F figueroa-rosero-etal-2025-evaluating
%X Large language models (LLMs) have proven highly effective for a wide range of tasks, including code generation. Recently, advancements in their capabilities have shown promise in areas like mathematical reasoning, chain-of-thought processes and self-reflection. However, their effectiveness in domains requiring nuanced understanding of financial contexts, such as accounting, remains unclear. In this study, we evaluate how well LLMs perform in generating code for domain-specific languages (DSLs) in accounting, using Beancount as a case study. We create a set of tasks based on common financial ratios, to evaluate the numeracy and financial literacy of LLMs. Our findings reveal that while LLMs are state-of-the art in generative tasks, they struggle severely with accounting, often producing inaccurate calculations and misinterpreting financial scenarios. We characterize these shortcomings through a comprehensive evaluation, shedding light on the limitations of LLMs in understanding and handling money-related tasks.
%U https://aclanthology.org/2025.finnlp-1.6/
%P 63-75
Markdown (Informal)
[Evaluating Financial Literacy of Large Language Models through Domain Specific Languages for Plain Text Accounting](https://aclanthology.org/2025.finnlp-1.6/) (Figueroa Rosero et al., FinNLP 2025)
ACL