@inproceedings{liang-etal-2024-learning,
title = "Learning to Trust Your Feelings: Leveraging Self-awareness in {LLM}s for Hallucination Mitigation",
author = "Liang, Yuxin and
Song, Zhuoyang and
Wang, Hao and
Zhang, Jiaxing",
editor = "Yu, Wenhao and
Shi, Weijia and
Yasunaga, Michihiro and
Jiang, Meng and
Zhu, Chenguang and
Hajishirzi, Hannaneh and
Zettlemoyer, Luke and
Zhang, Zhihan",
booktitle = "Proceedings of the 3rd Workshop on Knowledge Augmented Methods for NLP",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.knowledgenlp-1.4",
doi = "10.18653/v1/2024.knowledgenlp-1.4",
pages = "44--58",
abstract = "We evaluate the ability of Large Language Models (LLMs) to discern and express their internal knowledge state, a key factor in countering factual hallucination and ensuring reliable application of LLMs. We observe a robust self-awareness of internal knowledge state in LLMs, evidenced by over 85{\%} accuracy in knowledge state probing. However, LLMs often fail to faithfully express their internal knowledge during generation, leading to factual hallucinations. We develop an automated hallucination annotation tool, DreamCatcher, which merges knowledge probing and consistency checking methods to rank factual preference data. Using knowledge preference as reward, We propose a Reinforcement Learning from Knowledge Feedback (RLKF) training framework, leveraging reinforcement learning to enhance the factuality and honesty of LLMs. Our experiments across multiple models show that RLKF training effectively enhances the ability of models to utilize their internal knowledge state, boosting performance in a variety of knowledge-based and honesty-related tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liang-etal-2024-learning">
<titleInfo>
<title>Learning to Trust Your Feelings: Leveraging Self-awareness in LLMs for Hallucination Mitigation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuxin</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhuoyang</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaxing</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Knowledge Augmented Methods for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weijia</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michihiro</namePart>
<namePart type="family">Yasunaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenguang</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhihan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We evaluate the ability of Large Language Models (LLMs) to discern and express their internal knowledge state, a key factor in countering factual hallucination and ensuring reliable application of LLMs. We observe a robust self-awareness of internal knowledge state in LLMs, evidenced by over 85% accuracy in knowledge state probing. However, LLMs often fail to faithfully express their internal knowledge during generation, leading to factual hallucinations. We develop an automated hallucination annotation tool, DreamCatcher, which merges knowledge probing and consistency checking methods to rank factual preference data. Using knowledge preference as reward, We propose a Reinforcement Learning from Knowledge Feedback (RLKF) training framework, leveraging reinforcement learning to enhance the factuality and honesty of LLMs. Our experiments across multiple models show that RLKF training effectively enhances the ability of models to utilize their internal knowledge state, boosting performance in a variety of knowledge-based and honesty-related tasks.</abstract>
<identifier type="citekey">liang-etal-2024-learning</identifier>
<identifier type="doi">10.18653/v1/2024.knowledgenlp-1.4</identifier>
<location>
<url>https://aclanthology.org/2024.knowledgenlp-1.4</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>44</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning to Trust Your Feelings: Leveraging Self-awareness in LLMs for Hallucination Mitigation
%A Liang, Yuxin
%A Song, Zhuoyang
%A Wang, Hao
%A Zhang, Jiaxing
%Y Yu, Wenhao
%Y Shi, Weijia
%Y Yasunaga, Michihiro
%Y Jiang, Meng
%Y Zhu, Chenguang
%Y Hajishirzi, Hannaneh
%Y Zettlemoyer, Luke
%Y Zhang, Zhihan
%S Proceedings of the 3rd Workshop on Knowledge Augmented Methods for NLP
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F liang-etal-2024-learning
%X We evaluate the ability of Large Language Models (LLMs) to discern and express their internal knowledge state, a key factor in countering factual hallucination and ensuring reliable application of LLMs. We observe a robust self-awareness of internal knowledge state in LLMs, evidenced by over 85% accuracy in knowledge state probing. However, LLMs often fail to faithfully express their internal knowledge during generation, leading to factual hallucinations. We develop an automated hallucination annotation tool, DreamCatcher, which merges knowledge probing and consistency checking methods to rank factual preference data. Using knowledge preference as reward, We propose a Reinforcement Learning from Knowledge Feedback (RLKF) training framework, leveraging reinforcement learning to enhance the factuality and honesty of LLMs. Our experiments across multiple models show that RLKF training effectively enhances the ability of models to utilize their internal knowledge state, boosting performance in a variety of knowledge-based and honesty-related tasks.
%R 10.18653/v1/2024.knowledgenlp-1.4
%U https://aclanthology.org/2024.knowledgenlp-1.4
%U https://doi.org/10.18653/v1/2024.knowledgenlp-1.4
%P 44-58
Markdown (Informal)
[Learning to Trust Your Feelings: Leveraging Self-awareness in LLMs for Hallucination Mitigation](https://aclanthology.org/2024.knowledgenlp-1.4) (Liang et al., KnowledgeNLP-WS 2024)
ACL