@inproceedings{rizvi-etal-2024-sparc,
title = "{S}pa{RC} and {S}pa{RP}: Spatial Reasoning Characterization and Path Generation for Understanding Spatial Reasoning Capability of Large Language Models",
author = "Rizvi, Md Imbesat and
Zhu, Xiaodan and
Gurevych, Iryna",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.261",
doi = "10.18653/v1/2024.acl-long.261",
pages = "4750--4767",
abstract = "Spatial reasoning is a crucial component of both biological and artificial intelligence. In this work, we present a comprehensive study of the capability of current state-of-the-art large language models (LLMs) on spatial reasoning. To support our study, we created and contribute a novel Spatial Reasoning Characterization (SpaRC) framework and Spatial Reasoning Paths (SpaRP) datasets, to enable an in-depth understanding of the spatial relations and compositions as well as the usefulness of spatial reasoning chains. We found that all the state-of-the-art LLMs do not perform well on the datasets{---}their performances are consistently low across different setups. The spatial reasoning capability improves substantially as model sizes scale up. Finetuning both large language models (e.g., Llama-2-70B) and smaller ones (e.g., Llama-2-13B) can significantly improve their F1-scores by 7{--}32 absolute points. We also found that the top proprietary LLMs still significantly outperform their open-source counterparts in topological spatial understanding and reasoning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rizvi-etal-2024-sparc">
<titleInfo>
<title>SpaRC and SpaRP: Spatial Reasoning Characterization and Path Generation for Understanding Spatial Reasoning Capability of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Imbesat</namePart>
<namePart type="family">Rizvi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spatial reasoning is a crucial component of both biological and artificial intelligence. In this work, we present a comprehensive study of the capability of current state-of-the-art large language models (LLMs) on spatial reasoning. To support our study, we created and contribute a novel Spatial Reasoning Characterization (SpaRC) framework and Spatial Reasoning Paths (SpaRP) datasets, to enable an in-depth understanding of the spatial relations and compositions as well as the usefulness of spatial reasoning chains. We found that all the state-of-the-art LLMs do not perform well on the datasets—their performances are consistently low across different setups. The spatial reasoning capability improves substantially as model sizes scale up. Finetuning both large language models (e.g., Llama-2-70B) and smaller ones (e.g., Llama-2-13B) can significantly improve their F1-scores by 7–32 absolute points. We also found that the top proprietary LLMs still significantly outperform their open-source counterparts in topological spatial understanding and reasoning.</abstract>
<identifier type="citekey">rizvi-etal-2024-sparc</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.261</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.261</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>4750</start>
<end>4767</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SpaRC and SpaRP: Spatial Reasoning Characterization and Path Generation for Understanding Spatial Reasoning Capability of Large Language Models
%A Rizvi, Md Imbesat
%A Zhu, Xiaodan
%A Gurevych, Iryna
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F rizvi-etal-2024-sparc
%X Spatial reasoning is a crucial component of both biological and artificial intelligence. In this work, we present a comprehensive study of the capability of current state-of-the-art large language models (LLMs) on spatial reasoning. To support our study, we created and contribute a novel Spatial Reasoning Characterization (SpaRC) framework and Spatial Reasoning Paths (SpaRP) datasets, to enable an in-depth understanding of the spatial relations and compositions as well as the usefulness of spatial reasoning chains. We found that all the state-of-the-art LLMs do not perform well on the datasets—their performances are consistently low across different setups. The spatial reasoning capability improves substantially as model sizes scale up. Finetuning both large language models (e.g., Llama-2-70B) and smaller ones (e.g., Llama-2-13B) can significantly improve their F1-scores by 7–32 absolute points. We also found that the top proprietary LLMs still significantly outperform their open-source counterparts in topological spatial understanding and reasoning.
%R 10.18653/v1/2024.acl-long.261
%U https://aclanthology.org/2024.acl-long.261
%U https://doi.org/10.18653/v1/2024.acl-long.261
%P 4750-4767
Markdown (Informal)
[SpaRC and SpaRP: Spatial Reasoning Characterization and Path Generation for Understanding Spatial Reasoning Capability of Large Language Models](https://aclanthology.org/2024.acl-long.261) (Rizvi et al., ACL 2024)
ACL