@inproceedings{gandhi-etal-2024-challenges,
title = "Challenges in End-to-End Policy Extraction from Climate Action Plans",
author = "Gandhi, Nupoor and
Corringham, Tom and
Strubell, Emma",
editor = "Stammbach, Dominik and
Ni, Jingwei and
Schimanski, Tobias and
Dutia, Kalyan and
Singh, Alok and
Bingler, Julia and
Christiaen, Christophe and
Kushwaha, Neetu and
Muccione, Veruska and
A. Vaghefi, Saeid and
Leippold, Markus",
booktitle = "Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.climatenlp-1.12",
doi = "10.18653/v1/2024.climatenlp-1.12",
pages = "156--167",
abstract = "Gray policy literature such as climate action plans (CAPs) provide an information-rich resource with potential to inform analysis and decision-making. However, these corpora are currently underutilized due to the substantial manual effort and expertise required to sift through long and detailed documents. Automatically structuring relevant information using information extraction (IE) would be useful for assisting policy scientists in synthesizing vast gray policy corpora to identify relevant entities, concepts and themes. LLMs have demonstrated strong performance on IE tasks in the few-shot setting, but it is unclear whether these gains transfer to gray policy literature which differs significantly to traditional benchmark datasets in several aspects, such as format of information content, length of documents, and inconsistency of document structure. We perform a case study on end-to-end IE with California CAPs, inspecting the performance of state-of-the-art tools for: (1) extracting content from CAPs into structured markup segments; (2) few-shot IE with LLMs; and (3) the utility of extracted entities for downstream analyses. We identify challenges at several points of the end-to-end IE pipeline for CAPs, and we provide recommendations for open problems centered around representing rich non-textual elements, document structure, flexible annotation schemes, and global information. Tackling these challenges would make it possible to realize the potential of LLMs for IE with gray policy literature.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gandhi-etal-2024-challenges">
<titleInfo>
<title>Challenges in End-to-End Policy Extraction from Climate Action Plans</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nupoor</namePart>
<namePart type="family">Gandhi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Corringham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emma</namePart>
<namePart type="family">Strubell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Stammbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Schimanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalyan</namePart>
<namePart type="family">Dutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Bingler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Christiaen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neetu</namePart>
<namePart type="family">Kushwaha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veruska</namePart>
<namePart type="family">Muccione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saeid</namePart>
<namePart type="family">A. Vaghefi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Leippold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Gray policy literature such as climate action plans (CAPs) provide an information-rich resource with potential to inform analysis and decision-making. However, these corpora are currently underutilized due to the substantial manual effort and expertise required to sift through long and detailed documents. Automatically structuring relevant information using information extraction (IE) would be useful for assisting policy scientists in synthesizing vast gray policy corpora to identify relevant entities, concepts and themes. LLMs have demonstrated strong performance on IE tasks in the few-shot setting, but it is unclear whether these gains transfer to gray policy literature which differs significantly to traditional benchmark datasets in several aspects, such as format of information content, length of documents, and inconsistency of document structure. We perform a case study on end-to-end IE with California CAPs, inspecting the performance of state-of-the-art tools for: (1) extracting content from CAPs into structured markup segments; (2) few-shot IE with LLMs; and (3) the utility of extracted entities for downstream analyses. We identify challenges at several points of the end-to-end IE pipeline for CAPs, and we provide recommendations for open problems centered around representing rich non-textual elements, document structure, flexible annotation schemes, and global information. Tackling these challenges would make it possible to realize the potential of LLMs for IE with gray policy literature.</abstract>
<identifier type="citekey">gandhi-etal-2024-challenges</identifier>
<identifier type="doi">10.18653/v1/2024.climatenlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2024.climatenlp-1.12</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>156</start>
<end>167</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Challenges in End-to-End Policy Extraction from Climate Action Plans
%A Gandhi, Nupoor
%A Corringham, Tom
%A Strubell, Emma
%Y Stammbach, Dominik
%Y Ni, Jingwei
%Y Schimanski, Tobias
%Y Dutia, Kalyan
%Y Singh, Alok
%Y Bingler, Julia
%Y Christiaen, Christophe
%Y Kushwaha, Neetu
%Y Muccione, Veruska
%Y A. Vaghefi, Saeid
%Y Leippold, Markus
%S Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F gandhi-etal-2024-challenges
%X Gray policy literature such as climate action plans (CAPs) provide an information-rich resource with potential to inform analysis and decision-making. However, these corpora are currently underutilized due to the substantial manual effort and expertise required to sift through long and detailed documents. Automatically structuring relevant information using information extraction (IE) would be useful for assisting policy scientists in synthesizing vast gray policy corpora to identify relevant entities, concepts and themes. LLMs have demonstrated strong performance on IE tasks in the few-shot setting, but it is unclear whether these gains transfer to gray policy literature which differs significantly to traditional benchmark datasets in several aspects, such as format of information content, length of documents, and inconsistency of document structure. We perform a case study on end-to-end IE with California CAPs, inspecting the performance of state-of-the-art tools for: (1) extracting content from CAPs into structured markup segments; (2) few-shot IE with LLMs; and (3) the utility of extracted entities for downstream analyses. We identify challenges at several points of the end-to-end IE pipeline for CAPs, and we provide recommendations for open problems centered around representing rich non-textual elements, document structure, flexible annotation schemes, and global information. Tackling these challenges would make it possible to realize the potential of LLMs for IE with gray policy literature.
%R 10.18653/v1/2024.climatenlp-1.12
%U https://aclanthology.org/2024.climatenlp-1.12
%U https://doi.org/10.18653/v1/2024.climatenlp-1.12
%P 156-167
Markdown (Informal)
[Challenges in End-to-End Policy Extraction from Climate Action Plans](https://aclanthology.org/2024.climatenlp-1.12) (Gandhi et al., ClimateNLP-WS 2024)
ACL