@inproceedings{gupta-etal-2025-exploring,
title = "Exploring Multimodal Language Models for Sustainability Disclosure Extraction: A Comparative Study",
author = "Gupta, Tanay and
Goel, Tushar and
Verma, Ishan",
editor = "Drozd, Aleksandr and
Sedoc, Jo{\~a}o and
Tafreshi, Shabnam and
Akula, Arjun and
Shu, Raphael",
booktitle = "The Sixth Workshop on Insights from Negative Results in NLP",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.insights-1.13/",
doi = "10.18653/v1/2025.insights-1.13",
pages = "141--149",
ISBN = "979-8-89176-240-4",
abstract = "Sustainability metrics have increasingly become a crucial non-financial criterion in investment decision-making. Organizations worldwide are recognizing the importance of sustainability and are proactively highlighting their efforts through specialized sustainability reports. Unlike traditional annual reports, these sustainability disclosures are typically text-heavy and are often expressed as infographics, complex tables, and charts. The non-machine-readable nature of these reports presents a significant challenge for efficient information extraction. The rapid advancement of Vision Language Models (VLMs) has raised the question whether these VLMs can address such challenges in domain specific task. In this study, we demonstrate the application of VLMs for extracting sustainability information from dedicated sustainability reports. Our experiments highlight the limitations in the performance of several open-source VLMs in extracting information about sustainability disclosures from different type of pages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gupta-etal-2025-exploring">
<titleInfo>
<title>Exploring Multimodal Language Models for Sustainability Disclosure Extraction: A Comparative Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanay</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tushar</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ishan</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Sixth Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="family">Drozd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="family">Akula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raphael</namePart>
<namePart type="family">Shu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-240-4</identifier>
</relatedItem>
<abstract>Sustainability metrics have increasingly become a crucial non-financial criterion in investment decision-making. Organizations worldwide are recognizing the importance of sustainability and are proactively highlighting their efforts through specialized sustainability reports. Unlike traditional annual reports, these sustainability disclosures are typically text-heavy and are often expressed as infographics, complex tables, and charts. The non-machine-readable nature of these reports presents a significant challenge for efficient information extraction. The rapid advancement of Vision Language Models (VLMs) has raised the question whether these VLMs can address such challenges in domain specific task. In this study, we demonstrate the application of VLMs for extracting sustainability information from dedicated sustainability reports. Our experiments highlight the limitations in the performance of several open-source VLMs in extracting information about sustainability disclosures from different type of pages.</abstract>
<identifier type="citekey">gupta-etal-2025-exploring</identifier>
<identifier type="doi">10.18653/v1/2025.insights-1.13</identifier>
<location>
<url>https://aclanthology.org/2025.insights-1.13/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>141</start>
<end>149</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Multimodal Language Models for Sustainability Disclosure Extraction: A Comparative Study
%A Gupta, Tanay
%A Goel, Tushar
%A Verma, Ishan
%Y Drozd, Aleksandr
%Y Sedoc, João
%Y Tafreshi, Shabnam
%Y Akula, Arjun
%Y Shu, Raphael
%S The Sixth Workshop on Insights from Negative Results in NLP
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-240-4
%F gupta-etal-2025-exploring
%X Sustainability metrics have increasingly become a crucial non-financial criterion in investment decision-making. Organizations worldwide are recognizing the importance of sustainability and are proactively highlighting their efforts through specialized sustainability reports. Unlike traditional annual reports, these sustainability disclosures are typically text-heavy and are often expressed as infographics, complex tables, and charts. The non-machine-readable nature of these reports presents a significant challenge for efficient information extraction. The rapid advancement of Vision Language Models (VLMs) has raised the question whether these VLMs can address such challenges in domain specific task. In this study, we demonstrate the application of VLMs for extracting sustainability information from dedicated sustainability reports. Our experiments highlight the limitations in the performance of several open-source VLMs in extracting information about sustainability disclosures from different type of pages.
%R 10.18653/v1/2025.insights-1.13
%U https://aclanthology.org/2025.insights-1.13/
%U https://doi.org/10.18653/v1/2025.insights-1.13
%P 141-149
Markdown (Informal)
[Exploring Multimodal Language Models for Sustainability Disclosure Extraction: A Comparative Study](https://aclanthology.org/2025.insights-1.13/) (Gupta et al., insights 2025)
ACL