@inproceedings{dimmelmeier-etal-2024-informing,
title = "Informing climate risk analysis using textual information - A research agenda",
author = "Dimmelmeier, Andreas and
Doll, Hendrik and
Schierholz, Malte and
Kormanyos, Emily and
Fehr, Maurice and
Ma, Bolei and
Beck, Jacob and
Fraser, Alexander and
Kreuter, Frauke",
editor = "Stammbach, Dominik and
Ni, Jingwei and
Schimanski, Tobias and
Dutia, Kalyan and
Singh, Alok and
Bingler, Julia and
Christiaen, Christophe and
Kushwaha, Neetu and
Muccione, Veruska and
A. Vaghefi, Saeid and
Leippold, Markus",
booktitle = "Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.climatenlp-1.2/",
doi = "10.18653/v1/2024.climatenlp-1.2",
pages = "12--26",
abstract = "We present a research agenda focused on efficiently extracting, assuring quality, and consolidating textual company sustainability information to address urgent climate change decision-making needs. Starting from the goal to create integrated FAIR (Findable, Accessible, Interoperable, Reusable) climate-related data, we identify research needs pertaining to the technical aspects of information extraction as well as to the design of the integrated sustainability datasets that we seek to compile. Regarding extraction, we leverage technological advancements, particularly in large language models (LLMs) and Retrieval-Augmented Generation (RAG) pipelines, to unlock the underutilized potential of unstructured textual information contained in corporate sustainability reports. In applying these techniques, we review key challenges, which include the retrieval and extraction of CO$_2$ emission values from PDF documents, especially from unstructured tables and graphs therein, and the validation of automatically extracted data through comparisons with human-annotated values. We also review how existing use cases and practices in climate risk analytics relate to choices of what textual information should be extracted and how it could be linked to existing structured data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dimmelmeier-etal-2024-informing">
<titleInfo>
<title>Informing climate risk analysis using textual information - A research agenda</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Dimmelmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hendrik</namePart>
<namePart type="family">Doll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malte</namePart>
<namePart type="family">Schierholz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Kormanyos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maurice</namePart>
<namePart type="family">Fehr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bolei</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Beck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Fraser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frauke</namePart>
<namePart type="family">Kreuter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Stammbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Schimanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalyan</namePart>
<namePart type="family">Dutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Bingler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Christiaen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neetu</namePart>
<namePart type="family">Kushwaha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veruska</namePart>
<namePart type="family">Muccione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saeid</namePart>
<namePart type="family">A. Vaghefi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Leippold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a research agenda focused on efficiently extracting, assuring quality, and consolidating textual company sustainability information to address urgent climate change decision-making needs. Starting from the goal to create integrated FAIR (Findable, Accessible, Interoperable, Reusable) climate-related data, we identify research needs pertaining to the technical aspects of information extraction as well as to the design of the integrated sustainability datasets that we seek to compile. Regarding extraction, we leverage technological advancements, particularly in large language models (LLMs) and Retrieval-Augmented Generation (RAG) pipelines, to unlock the underutilized potential of unstructured textual information contained in corporate sustainability reports. In applying these techniques, we review key challenges, which include the retrieval and extraction of CO₂ emission values from PDF documents, especially from unstructured tables and graphs therein, and the validation of automatically extracted data through comparisons with human-annotated values. We also review how existing use cases and practices in climate risk analytics relate to choices of what textual information should be extracted and how it could be linked to existing structured data.</abstract>
<identifier type="citekey">dimmelmeier-etal-2024-informing</identifier>
<identifier type="doi">10.18653/v1/2024.climatenlp-1.2</identifier>
<location>
<url>https://aclanthology.org/2024.climatenlp-1.2/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>12</start>
<end>26</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Informing climate risk analysis using textual information - A research agenda
%A Dimmelmeier, Andreas
%A Doll, Hendrik
%A Schierholz, Malte
%A Kormanyos, Emily
%A Fehr, Maurice
%A Ma, Bolei
%A Beck, Jacob
%A Fraser, Alexander
%A Kreuter, Frauke
%Y Stammbach, Dominik
%Y Ni, Jingwei
%Y Schimanski, Tobias
%Y Dutia, Kalyan
%Y Singh, Alok
%Y Bingler, Julia
%Y Christiaen, Christophe
%Y Kushwaha, Neetu
%Y Muccione, Veruska
%Y A. Vaghefi, Saeid
%Y Leippold, Markus
%S Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F dimmelmeier-etal-2024-informing
%X We present a research agenda focused on efficiently extracting, assuring quality, and consolidating textual company sustainability information to address urgent climate change decision-making needs. Starting from the goal to create integrated FAIR (Findable, Accessible, Interoperable, Reusable) climate-related data, we identify research needs pertaining to the technical aspects of information extraction as well as to the design of the integrated sustainability datasets that we seek to compile. Regarding extraction, we leverage technological advancements, particularly in large language models (LLMs) and Retrieval-Augmented Generation (RAG) pipelines, to unlock the underutilized potential of unstructured textual information contained in corporate sustainability reports. In applying these techniques, we review key challenges, which include the retrieval and extraction of CO₂ emission values from PDF documents, especially from unstructured tables and graphs therein, and the validation of automatically extracted data through comparisons with human-annotated values. We also review how existing use cases and practices in climate risk analytics relate to choices of what textual information should be extracted and how it could be linked to existing structured data.
%R 10.18653/v1/2024.climatenlp-1.2
%U https://aclanthology.org/2024.climatenlp-1.2/
%U https://doi.org/10.18653/v1/2024.climatenlp-1.2
%P 12-26
Markdown (Informal)
[Informing climate risk analysis using textual information - A research agenda](https://aclanthology.org/2024.climatenlp-1.2/) (Dimmelmeier et al., ClimateNLP 2024)
ACL
- Andreas Dimmelmeier, Hendrik Doll, Malte Schierholz, Emily Kormanyos, Maurice Fehr, Bolei Ma, Jacob Beck, Alexander Fraser, and Frauke Kreuter. 2024. Informing climate risk analysis using textual information - A research agenda. In Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024), pages 12–26, Bangkok, Thailand. Association for Computational Linguistics.