@inproceedings{ghosh-etal-2025-infogen,
title = "Infogen: Generating Complex Statistical Infographics from Documents",
author = "Ghosh, Akash and
Garimella, Aparna and
Ramu, Pritika and
Bandyopadhyay, Sambaran and
Saha, Sriparna",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1003/",
doi = "10.18653/v1/2025.acl-long.1003",
pages = "20552--20570",
ISBN = "979-8-89176-251-0",
abstract = "Statistical infographics are powerful tools that simplify complex data into visually engaging and easy-to-understand formats. Despite advancements in AI, particularly with LLMs, existing efforts have been limited to generating simple charts, with no prior work addressing the creation of complex infographics from text-heavy documents that demand a deep understanding of the content. We address this gap by introducing the task of generating \textit{statistical infographics} composed of multiple sub-charts (e.g., line, bar, pie) that are contextually accurate, insightful, and visually aligned. To achieve this, we define infographic metadata, that includes its title and textual insights, along with sub-chart-specific details such as their corresponding data, alignment, etc. We also present \textbf{ \textit{Infodat}}, the first benchmark dataset for text-to-infographic metadata generation, where each sample links a document to its metadata. We propose \textbf{ \textit{Infogen}}, a two-stage framework where fine-tuned LLMs first generate metadata, which is then converted into infographic code. Extensive evaluations on \textbf{ \textit{Infodat}} demonstrate that \textbf{ \textit{Infogen}} achieves state-of-the-art performance, outperforming both closed and open-source LLMs in text-to-statistical infographic generation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghosh-etal-2025-infogen">
<titleInfo>
<title>Infogen: Generating Complex Statistical Infographics from Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akash</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aparna</namePart>
<namePart type="family">Garimella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pritika</namePart>
<namePart type="family">Ramu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sambaran</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sriparna</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Statistical infographics are powerful tools that simplify complex data into visually engaging and easy-to-understand formats. Despite advancements in AI, particularly with LLMs, existing efforts have been limited to generating simple charts, with no prior work addressing the creation of complex infographics from text-heavy documents that demand a deep understanding of the content. We address this gap by introducing the task of generating statistical infographics composed of multiple sub-charts (e.g., line, bar, pie) that are contextually accurate, insightful, and visually aligned. To achieve this, we define infographic metadata, that includes its title and textual insights, along with sub-chart-specific details such as their corresponding data, alignment, etc. We also present Infodat, the first benchmark dataset for text-to-infographic metadata generation, where each sample links a document to its metadata. We propose Infogen, a two-stage framework where fine-tuned LLMs first generate metadata, which is then converted into infographic code. Extensive evaluations on Infodat demonstrate that Infogen achieves state-of-the-art performance, outperforming both closed and open-source LLMs in text-to-statistical infographic generation.</abstract>
<identifier type="citekey">ghosh-etal-2025-infogen</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1003</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1003/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>20552</start>
<end>20570</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Infogen: Generating Complex Statistical Infographics from Documents
%A Ghosh, Akash
%A Garimella, Aparna
%A Ramu, Pritika
%A Bandyopadhyay, Sambaran
%A Saha, Sriparna
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F ghosh-etal-2025-infogen
%X Statistical infographics are powerful tools that simplify complex data into visually engaging and easy-to-understand formats. Despite advancements in AI, particularly with LLMs, existing efforts have been limited to generating simple charts, with no prior work addressing the creation of complex infographics from text-heavy documents that demand a deep understanding of the content. We address this gap by introducing the task of generating statistical infographics composed of multiple sub-charts (e.g., line, bar, pie) that are contextually accurate, insightful, and visually aligned. To achieve this, we define infographic metadata, that includes its title and textual insights, along with sub-chart-specific details such as their corresponding data, alignment, etc. We also present Infodat, the first benchmark dataset for text-to-infographic metadata generation, where each sample links a document to its metadata. We propose Infogen, a two-stage framework where fine-tuned LLMs first generate metadata, which is then converted into infographic code. Extensive evaluations on Infodat demonstrate that Infogen achieves state-of-the-art performance, outperforming both closed and open-source LLMs in text-to-statistical infographic generation.
%R 10.18653/v1/2025.acl-long.1003
%U https://aclanthology.org/2025.acl-long.1003/
%U https://doi.org/10.18653/v1/2025.acl-long.1003
%P 20552-20570
Markdown (Informal)
[Infogen: Generating Complex Statistical Infographics from Documents](https://aclanthology.org/2025.acl-long.1003/) (Ghosh et al., ACL 2025)
ACL
- Akash Ghosh, Aparna Garimella, Pritika Ramu, Sambaran Bandyopadhyay, and Sriparna Saha. 2025. Infogen: Generating Complex Statistical Infographics from Documents. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 20552–20570, Vienna, Austria. Association for Computational Linguistics.