@inproceedings{anaokar-etal-2025-halludetect,
title = "{H}allu{D}etect: Detecting, Mitigating, and Benchmarking Hallucinations in Conversational Systems in the Legal Domain",
author = "Anaokar, Spandan and
Ganatra, Shrey and
Bhattacharyya, Swapnil and
Kashid, Harshvivek and
Nair, Shruthi N and
Sekhar, Reshma and
Manohar, Siddharth and
Hemrajani, Rahul and
Bhattacharyya, Pushpak",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.128/",
pages = "1822--1847",
ISBN = "979-8-89176-333-3",
abstract = "Large Language Models (LLMs) are widely used in industry but remain prone to hallucinations, limiting their reliability in critical applications. This work addresses hallucination reduction in consumer grievance chatbots built using LLaMA 3.1 8B Instruct, a compact model frequently used in industry. We develop **HalluDetect**, an LLM-based hallucination detection system that achieves an F1 score of **68.92{\%}** outperforming baseline detectors by **22.47{\%}**. Benchmarking five hallucination mitigation architectures, we find that out of them, AgentBot minimizes hallucinations to **0.4159** per turn while maintaining the highest token accuracy (**96.13{\%}**), making it the most effective mitigation strategy. Our findings provide a scalable framework for hallucination mitigation, demonstrating that optimized inference strategies can significantly improve factual accuracy."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="anaokar-etal-2025-halludetect">
<titleInfo>
<title>HalluDetect: Detecting, Mitigating, and Benchmarking Hallucinations in Conversational Systems in the Legal Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Spandan</namePart>
<namePart type="family">Anaokar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shrey</namePart>
<namePart type="family">Ganatra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swapnil</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harshvivek</namePart>
<namePart type="family">Kashid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruthi</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Nair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reshma</namePart>
<namePart type="family">Sekhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Manohar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Hemrajani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) are widely used in industry but remain prone to hallucinations, limiting their reliability in critical applications. This work addresses hallucination reduction in consumer grievance chatbots built using LLaMA 3.1 8B Instruct, a compact model frequently used in industry. We develop **HalluDetect**, an LLM-based hallucination detection system that achieves an F1 score of **68.92%** outperforming baseline detectors by **22.47%**. Benchmarking five hallucination mitigation architectures, we find that out of them, AgentBot minimizes hallucinations to **0.4159** per turn while maintaining the highest token accuracy (**96.13%**), making it the most effective mitigation strategy. Our findings provide a scalable framework for hallucination mitigation, demonstrating that optimized inference strategies can significantly improve factual accuracy.</abstract>
<identifier type="citekey">anaokar-etal-2025-halludetect</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.128/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>1822</start>
<end>1847</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HalluDetect: Detecting, Mitigating, and Benchmarking Hallucinations in Conversational Systems in the Legal Domain
%A Anaokar, Spandan
%A Ganatra, Shrey
%A Bhattacharyya, Swapnil
%A Kashid, Harshvivek
%A Nair, Shruthi N.
%A Sekhar, Reshma
%A Manohar, Siddharth
%A Hemrajani, Rahul
%A Bhattacharyya, Pushpak
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F anaokar-etal-2025-halludetect
%X Large Language Models (LLMs) are widely used in industry but remain prone to hallucinations, limiting their reliability in critical applications. This work addresses hallucination reduction in consumer grievance chatbots built using LLaMA 3.1 8B Instruct, a compact model frequently used in industry. We develop **HalluDetect**, an LLM-based hallucination detection system that achieves an F1 score of **68.92%** outperforming baseline detectors by **22.47%**. Benchmarking five hallucination mitigation architectures, we find that out of them, AgentBot minimizes hallucinations to **0.4159** per turn while maintaining the highest token accuracy (**96.13%**), making it the most effective mitigation strategy. Our findings provide a scalable framework for hallucination mitigation, demonstrating that optimized inference strategies can significantly improve factual accuracy.
%U https://aclanthology.org/2025.emnlp-industry.128/
%P 1822-1847
Markdown (Informal)
[HalluDetect: Detecting, Mitigating, and Benchmarking Hallucinations in Conversational Systems in the Legal Domain](https://aclanthology.org/2025.emnlp-industry.128/) (Anaokar et al., EMNLP 2025)
ACL
- Spandan Anaokar, Shrey Ganatra, Swapnil Bhattacharyya, Harshvivek Kashid, Shruthi N Nair, Reshma Sekhar, Siddharth Manohar, Rahul Hemrajani, and Pushpak Bhattacharyya. 2025. HalluDetect: Detecting, Mitigating, and Benchmarking Hallucinations in Conversational Systems in the Legal Domain. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 1822–1847, Suzhou (China). Association for Computational Linguistics.