@inproceedings{borah-etal-2025-clime,
title = "{C}li{ME}: Evaluating Multimodal Climate Discourse on Social Media and the Climate Alignment Quotient ({CAQ})",
author = "Borah, Abhilekh and
Abdullah, Hasnat Md and
Wei, Kangda and
Huang, Ruihong",
editor = "Atwell, Katherine and
Biester, Laura and
Borah, Angana and
Dementieva, Daryna and
Ignat, Oana and
Kotonya, Neema and
Liu, Ziyi and
Wan, Ruyuan and
Wilson, Steven and
Zhao, Jieyu",
booktitle = "Proceedings of the Fourth Workshop on NLP for Positive Impact (NLP4PI)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nlp4pi-1.4/",
doi = "10.18653/v1/2025.nlp4pi-1.4",
pages = "43--61",
ISBN = "978-1-959429-19-7",
abstract = "The rise of Large Language Models (LLMs) has raised questions about their ability to understand climate-related contexts. Though climate change dominates social media, analyzing its multimodal expressions is understudied, and current tools have failed to determine whether LLMs amplify credible solutions or spread unsubstantiated claims. To address this, we introduce CliME (Climate Change Multimodal Evaluation), a first-of-its-kind multimodal dataset, comprising 2579 Twitter and Reddit posts. The benchmark features a diverse collection of humorous memes and skeptical posts, capturing how these formats distill complex issues into viral narratives that shape public opinion and policy discussions. To systematically evaluate LLM performance, we present the Climate Alignment Quotient (CAQ), a novel metric comprising five distinct dimensions: Articulation, Evidence, Resonance, Transition, and Specificity. Additionally, we propose three analytical lenses: Actionability, Criticality, and Justice, to guide the assessment of LLM-generated climate discourse using CAQ. Our findings, based on the CAQ metric, indicate that while most evaluated LLMs perform relatively well in Criticality and Justice, they consistently underperform on the Actionability axis. Among the models evaluated, Claude 3.7 Sonnet achieves the highest overall performance. We publicly release our code and dataset to foster further research in this domain."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="borah-etal-2025-clime">
<titleInfo>
<title>CliME: Evaluating Multimodal Climate Discourse on Social Media and the Climate Alignment Quotient (CAQ)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhilekh</namePart>
<namePart type="family">Borah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hasnat</namePart>
<namePart type="given">Md</namePart>
<namePart type="family">Abdullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kangda</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP for Positive Impact (NLP4PI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Atwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Biester</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angana</namePart>
<namePart type="family">Borah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daryna</namePart>
<namePart type="family">Dementieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Ignat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neema</namePart>
<namePart type="family">Kotonya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruyuan</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jieyu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-1-959429-19-7</identifier>
</relatedItem>
<abstract>The rise of Large Language Models (LLMs) has raised questions about their ability to understand climate-related contexts. Though climate change dominates social media, analyzing its multimodal expressions is understudied, and current tools have failed to determine whether LLMs amplify credible solutions or spread unsubstantiated claims. To address this, we introduce CliME (Climate Change Multimodal Evaluation), a first-of-its-kind multimodal dataset, comprising 2579 Twitter and Reddit posts. The benchmark features a diverse collection of humorous memes and skeptical posts, capturing how these formats distill complex issues into viral narratives that shape public opinion and policy discussions. To systematically evaluate LLM performance, we present the Climate Alignment Quotient (CAQ), a novel metric comprising five distinct dimensions: Articulation, Evidence, Resonance, Transition, and Specificity. Additionally, we propose three analytical lenses: Actionability, Criticality, and Justice, to guide the assessment of LLM-generated climate discourse using CAQ. Our findings, based on the CAQ metric, indicate that while most evaluated LLMs perform relatively well in Criticality and Justice, they consistently underperform on the Actionability axis. Among the models evaluated, Claude 3.7 Sonnet achieves the highest overall performance. We publicly release our code and dataset to foster further research in this domain.</abstract>
<identifier type="citekey">borah-etal-2025-clime</identifier>
<identifier type="doi">10.18653/v1/2025.nlp4pi-1.4</identifier>
<location>
<url>https://aclanthology.org/2025.nlp4pi-1.4/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>43</start>
<end>61</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CliME: Evaluating Multimodal Climate Discourse on Social Media and the Climate Alignment Quotient (CAQ)
%A Borah, Abhilekh
%A Abdullah, Hasnat Md
%A Wei, Kangda
%A Huang, Ruihong
%Y Atwell, Katherine
%Y Biester, Laura
%Y Borah, Angana
%Y Dementieva, Daryna
%Y Ignat, Oana
%Y Kotonya, Neema
%Y Liu, Ziyi
%Y Wan, Ruyuan
%Y Wilson, Steven
%Y Zhao, Jieyu
%S Proceedings of the Fourth Workshop on NLP for Positive Impact (NLP4PI)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 978-1-959429-19-7
%F borah-etal-2025-clime
%X The rise of Large Language Models (LLMs) has raised questions about their ability to understand climate-related contexts. Though climate change dominates social media, analyzing its multimodal expressions is understudied, and current tools have failed to determine whether LLMs amplify credible solutions or spread unsubstantiated claims. To address this, we introduce CliME (Climate Change Multimodal Evaluation), a first-of-its-kind multimodal dataset, comprising 2579 Twitter and Reddit posts. The benchmark features a diverse collection of humorous memes and skeptical posts, capturing how these formats distill complex issues into viral narratives that shape public opinion and policy discussions. To systematically evaluate LLM performance, we present the Climate Alignment Quotient (CAQ), a novel metric comprising five distinct dimensions: Articulation, Evidence, Resonance, Transition, and Specificity. Additionally, we propose three analytical lenses: Actionability, Criticality, and Justice, to guide the assessment of LLM-generated climate discourse using CAQ. Our findings, based on the CAQ metric, indicate that while most evaluated LLMs perform relatively well in Criticality and Justice, they consistently underperform on the Actionability axis. Among the models evaluated, Claude 3.7 Sonnet achieves the highest overall performance. We publicly release our code and dataset to foster further research in this domain.
%R 10.18653/v1/2025.nlp4pi-1.4
%U https://aclanthology.org/2025.nlp4pi-1.4/
%U https://doi.org/10.18653/v1/2025.nlp4pi-1.4
%P 43-61
Markdown (Informal)
[CliME: Evaluating Multimodal Climate Discourse on Social Media and the Climate Alignment Quotient (CAQ)](https://aclanthology.org/2025.nlp4pi-1.4/) (Borah et al., NLP4PI 2025)
ACL