@inproceedings{li-etal-2024-truthreader,
title = "{T}ruth{R}eader: Towards Trustworthy Document Assistant Chatbot with Reliable Attribution",
author = "Li, Dongfang and
Hu, Xinshuo and
Sun, Zetian and
Hu, Baotian and
Ye, Shaolin and
Shan, Zifei and
Chen, Qian and
Zhang, Min",
editor = "Hernandez Farias, Delia Irazu and
Hope, Tom and
Li, Manling",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-demo.10",
pages = "89--100",
abstract = "Document assistant chatbots are empowered with extensive capabilities by Large Language Models (LLMs) and have exhibited significant advancements. However, these systems may suffer from hallucinations that are difficult to verify in the context of given documents.Moreover, despite the emergence of products for document assistants, they either heavily rely on commercial LLM APIs or lack transparency in their technical implementations, leading to expensive usage costs and data privacy concerns. In this work, we introduce a fully open-source document assistant chatbot with reliable attribution, named TruthReader, utilizing adapted conversational retriever and LLMs. Our system enables the LLMs to generate answers with detailed inline citations, which can be attributed to the original document paragraphs, facilitating the verification of the factual consistency of the generated text. To further adapt the generative model, we develop a comprehensive pipeline consisting of data construction and model optimization processes.This pipeline equips the LLMs with the necessary capabilities to generate accurate answers, produce reliable citations, and refuse unanswerable questions. Our codebase, data and models are released, and the video demonstration of our system is available at https://youtu.be/RYVt3itzUQM.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2024-truthreader">
<titleInfo>
<title>TruthReader: Towards Trustworthy Document Assistant Chatbot with Reliable Attribution</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dongfang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinshuo</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zetian</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baotian</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaolin</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zifei</namePart>
<namePart type="family">Shan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Delia</namePart>
<namePart type="given">Irazu</namePart>
<namePart type="family">Hernandez Farias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Hope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Document assistant chatbots are empowered with extensive capabilities by Large Language Models (LLMs) and have exhibited significant advancements. However, these systems may suffer from hallucinations that are difficult to verify in the context of given documents.Moreover, despite the emergence of products for document assistants, they either heavily rely on commercial LLM APIs or lack transparency in their technical implementations, leading to expensive usage costs and data privacy concerns. In this work, we introduce a fully open-source document assistant chatbot with reliable attribution, named TruthReader, utilizing adapted conversational retriever and LLMs. Our system enables the LLMs to generate answers with detailed inline citations, which can be attributed to the original document paragraphs, facilitating the verification of the factual consistency of the generated text. To further adapt the generative model, we develop a comprehensive pipeline consisting of data construction and model optimization processes.This pipeline equips the LLMs with the necessary capabilities to generate accurate answers, produce reliable citations, and refuse unanswerable questions. Our codebase, data and models are released, and the video demonstration of our system is available at https://youtu.be/RYVt3itzUQM.</abstract>
<identifier type="citekey">li-etal-2024-truthreader</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-demo.10</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>89</start>
<end>100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TruthReader: Towards Trustworthy Document Assistant Chatbot with Reliable Attribution
%A Li, Dongfang
%A Hu, Xinshuo
%A Sun, Zetian
%A Hu, Baotian
%A Ye, Shaolin
%A Shan, Zifei
%A Chen, Qian
%A Zhang, Min
%Y Hernandez Farias, Delia Irazu
%Y Hope, Tom
%Y Li, Manling
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F li-etal-2024-truthreader
%X Document assistant chatbots are empowered with extensive capabilities by Large Language Models (LLMs) and have exhibited significant advancements. However, these systems may suffer from hallucinations that are difficult to verify in the context of given documents.Moreover, despite the emergence of products for document assistants, they either heavily rely on commercial LLM APIs or lack transparency in their technical implementations, leading to expensive usage costs and data privacy concerns. In this work, we introduce a fully open-source document assistant chatbot with reliable attribution, named TruthReader, utilizing adapted conversational retriever and LLMs. Our system enables the LLMs to generate answers with detailed inline citations, which can be attributed to the original document paragraphs, facilitating the verification of the factual consistency of the generated text. To further adapt the generative model, we develop a comprehensive pipeline consisting of data construction and model optimization processes.This pipeline equips the LLMs with the necessary capabilities to generate accurate answers, produce reliable citations, and refuse unanswerable questions. Our codebase, data and models are released, and the video demonstration of our system is available at https://youtu.be/RYVt3itzUQM.
%U https://aclanthology.org/2024.emnlp-demo.10
%P 89-100
Markdown (Informal)
[TruthReader: Towards Trustworthy Document Assistant Chatbot with Reliable Attribution](https://aclanthology.org/2024.emnlp-demo.10) (Li et al., EMNLP 2024)
ACL
- Dongfang Li, Xinshuo Hu, Zetian Sun, Baotian Hu, Shaolin Ye, Zifei Shan, Qian Chen, and Min Zhang. 2024. TruthReader: Towards Trustworthy Document Assistant Chatbot with Reliable Attribution. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 89–100, Miami, Florida, USA. Association for Computational Linguistics.