@inproceedings{hsu-etal-2024-evaluating,
title = "Evaluating {C}hat{N}et{Z}ero, an {LLM}-Chatbot to Demystify Climate Pledges",
author = "Hsu, Angel and
Laney, Mason and
Zhang, Ji and
Manya, Diego and
Farczadi, Linda",
editor = "Stammbach, Dominik and
Ni, Jingwei and
Schimanski, Tobias and
Dutia, Kalyan and
Singh, Alok and
Bingler, Julia and
Christiaen, Christophe and
Kushwaha, Neetu and
Muccione, Veruska and
A. Vaghefi, Saeid and
Leippold, Markus",
booktitle = "Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.climatenlp-1.6",
doi = "10.18653/v1/2024.climatenlp-1.6",
pages = "82--92",
abstract = "This paper introduces and evaluates ChatNetZero, a large-language model (LLM) chatbot developed through Retrieval-Augmented Generation (RAG), which uses generative AI to produce answers grounded in verified, climate-domain specific information. We describe ChatNetZero{'}s design, particularly the innovation of anti-hallucination and reference modules designed to enhance the accuracy and credibility of generated responses. To evaluate ChatNetZero{'}s performance against other LLMs, including GPT-4, Gemini, Coral, and ChatClimate, we conduct two types of validation: comparing LLMs{'} generated responses to original source documents to verify their factual accuracy, and employing an expert survey to evaluate the overall quality, accuracy and relevance of each response. We find that while ChatNetZero responses show higher factual accuracy when compared to original source data, experts surveyed prefer lengthier responses that provide more context. Our results highlight the importance of prioritizing information presentation in the design of domain-specific LLMs to ensure that scientific information is effectively communicated, especially as even expert audiences find it challenging to assess the credibility of AI-generated content.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hsu-etal-2024-evaluating">
<titleInfo>
<title>Evaluating ChatNetZero, an LLM-Chatbot to Demystify Climate Pledges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angel</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mason</namePart>
<namePart type="family">Laney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ji</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Manya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linda</namePart>
<namePart type="family">Farczadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Stammbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Schimanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalyan</namePart>
<namePart type="family">Dutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Bingler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Christiaen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neetu</namePart>
<namePart type="family">Kushwaha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veruska</namePart>
<namePart type="family">Muccione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saeid</namePart>
<namePart type="family">A. Vaghefi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Leippold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces and evaluates ChatNetZero, a large-language model (LLM) chatbot developed through Retrieval-Augmented Generation (RAG), which uses generative AI to produce answers grounded in verified, climate-domain specific information. We describe ChatNetZero’s design, particularly the innovation of anti-hallucination and reference modules designed to enhance the accuracy and credibility of generated responses. To evaluate ChatNetZero’s performance against other LLMs, including GPT-4, Gemini, Coral, and ChatClimate, we conduct two types of validation: comparing LLMs’ generated responses to original source documents to verify their factual accuracy, and employing an expert survey to evaluate the overall quality, accuracy and relevance of each response. We find that while ChatNetZero responses show higher factual accuracy when compared to original source data, experts surveyed prefer lengthier responses that provide more context. Our results highlight the importance of prioritizing information presentation in the design of domain-specific LLMs to ensure that scientific information is effectively communicated, especially as even expert audiences find it challenging to assess the credibility of AI-generated content.</abstract>
<identifier type="citekey">hsu-etal-2024-evaluating</identifier>
<identifier type="doi">10.18653/v1/2024.climatenlp-1.6</identifier>
<location>
<url>https://aclanthology.org/2024.climatenlp-1.6</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>82</start>
<end>92</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating ChatNetZero, an LLM-Chatbot to Demystify Climate Pledges
%A Hsu, Angel
%A Laney, Mason
%A Zhang, Ji
%A Manya, Diego
%A Farczadi, Linda
%Y Stammbach, Dominik
%Y Ni, Jingwei
%Y Schimanski, Tobias
%Y Dutia, Kalyan
%Y Singh, Alok
%Y Bingler, Julia
%Y Christiaen, Christophe
%Y Kushwaha, Neetu
%Y Muccione, Veruska
%Y A. Vaghefi, Saeid
%Y Leippold, Markus
%S Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F hsu-etal-2024-evaluating
%X This paper introduces and evaluates ChatNetZero, a large-language model (LLM) chatbot developed through Retrieval-Augmented Generation (RAG), which uses generative AI to produce answers grounded in verified, climate-domain specific information. We describe ChatNetZero’s design, particularly the innovation of anti-hallucination and reference modules designed to enhance the accuracy and credibility of generated responses. To evaluate ChatNetZero’s performance against other LLMs, including GPT-4, Gemini, Coral, and ChatClimate, we conduct two types of validation: comparing LLMs’ generated responses to original source documents to verify their factual accuracy, and employing an expert survey to evaluate the overall quality, accuracy and relevance of each response. We find that while ChatNetZero responses show higher factual accuracy when compared to original source data, experts surveyed prefer lengthier responses that provide more context. Our results highlight the importance of prioritizing information presentation in the design of domain-specific LLMs to ensure that scientific information is effectively communicated, especially as even expert audiences find it challenging to assess the credibility of AI-generated content.
%R 10.18653/v1/2024.climatenlp-1.6
%U https://aclanthology.org/2024.climatenlp-1.6
%U https://doi.org/10.18653/v1/2024.climatenlp-1.6
%P 82-92
Markdown (Informal)
[Evaluating ChatNetZero, an LLM-Chatbot to Demystify Climate Pledges](https://aclanthology.org/2024.climatenlp-1.6) (Hsu et al., ClimateNLP-WS 2024)
ACL