@inproceedings{kurfali-etal-2025-climateeval,
title = "{C}limate{E}val: A Comprehensive Benchmark for {NLP} Tasks Related to Climate Change",
author = "Kurfali, Murathan and
Zahra, Shorouq and
Nivre, Joakim and
Messori, Gabriele",
editor = "Dutia, Kalyan and
Henderson, Peter and
Leippold, Markus and
Manning, Christoper and
Morio, Gaku and
Muccione, Veruska and
Ni, Jingwei and
Schimanski, Tobias and
Stammbach, Dominik and
Singh, Alok and
Su, Alba (Ruiran) and
A. Vaghefi, Saeid",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.climatenlp-1.13/",
doi = "10.18653/v1/2025.climatenlp-1.13",
pages = "194--207",
ISBN = "979-8-89176-259-6",
abstract = "ClimateEval is a comprehensive benchmark designed to evaluate natural language processing models across a broad range of tasks related to climate change. ClimateEval aggregates existing datasets along with a newly developed news classification dataset, created specifically for this release. This results in a benchmark of 25 tasks based on 13 datasets, covering key aspects of climate discourse, including text classification, question answering, and information extraction. Our benchmark provides a standardized evaluation suite for systematically assessing the performance of large language models (LLMs) on these tasks. Additionally, we conduct an extensive evaluation of open-source LLMs (ranging from 2B to 70B parameters) in both zero-shot and few-shot settings, analyzing their strengths and limitations in the domain of climate change."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kurfali-etal-2025-climateeval">
<titleInfo>
<title>ClimateEval: A Comprehensive Benchmark for NLP Tasks Related to Climate Change</title>
</titleInfo>
<name type="personal">
<namePart type="given">Murathan</namePart>
<namePart type="family">Kurfali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shorouq</namePart>
<namePart type="family">Zahra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriele</namePart>
<namePart type="family">Messori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kalyan</namePart>
<namePart type="family">Dutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Henderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Leippold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoper</namePart>
<namePart type="family">Manning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaku</namePart>
<namePart type="family">Morio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veruska</namePart>
<namePart type="family">Muccione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Schimanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Stammbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alba</namePart>
<namePart type="given">(Ruiran)</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saeid</namePart>
<namePart type="family">A. Vaghefi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-259-6</identifier>
</relatedItem>
<abstract>ClimateEval is a comprehensive benchmark designed to evaluate natural language processing models across a broad range of tasks related to climate change. ClimateEval aggregates existing datasets along with a newly developed news classification dataset, created specifically for this release. This results in a benchmark of 25 tasks based on 13 datasets, covering key aspects of climate discourse, including text classification, question answering, and information extraction. Our benchmark provides a standardized evaluation suite for systematically assessing the performance of large language models (LLMs) on these tasks. Additionally, we conduct an extensive evaluation of open-source LLMs (ranging from 2B to 70B parameters) in both zero-shot and few-shot settings, analyzing their strengths and limitations in the domain of climate change.</abstract>
<identifier type="citekey">kurfali-etal-2025-climateeval</identifier>
<identifier type="doi">10.18653/v1/2025.climatenlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2025.climatenlp-1.13/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>194</start>
<end>207</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ClimateEval: A Comprehensive Benchmark for NLP Tasks Related to Climate Change
%A Kurfali, Murathan
%A Zahra, Shorouq
%A Nivre, Joakim
%A Messori, Gabriele
%Y Dutia, Kalyan
%Y Henderson, Peter
%Y Leippold, Markus
%Y Manning, Christoper
%Y Morio, Gaku
%Y Muccione, Veruska
%Y Ni, Jingwei
%Y Schimanski, Tobias
%Y Stammbach, Dominik
%Y Singh, Alok
%Y Su, Alba (Ruiran)
%Y A. Vaghefi, Saeid
%S Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-259-6
%F kurfali-etal-2025-climateeval
%X ClimateEval is a comprehensive benchmark designed to evaluate natural language processing models across a broad range of tasks related to climate change. ClimateEval aggregates existing datasets along with a newly developed news classification dataset, created specifically for this release. This results in a benchmark of 25 tasks based on 13 datasets, covering key aspects of climate discourse, including text classification, question answering, and information extraction. Our benchmark provides a standardized evaluation suite for systematically assessing the performance of large language models (LLMs) on these tasks. Additionally, we conduct an extensive evaluation of open-source LLMs (ranging from 2B to 70B parameters) in both zero-shot and few-shot settings, analyzing their strengths and limitations in the domain of climate change.
%R 10.18653/v1/2025.climatenlp-1.13
%U https://aclanthology.org/2025.climatenlp-1.13/
%U https://doi.org/10.18653/v1/2025.climatenlp-1.13
%P 194-207
Markdown (Informal)
[ClimateEval: A Comprehensive Benchmark for NLP Tasks Related to Climate Change](https://aclanthology.org/2025.climatenlp-1.13/) (Kurfali et al., ClimateNLP 2025)
ACL