@inproceedings{ramrakhiyani-etal-2025-queryshield,
title = "{Q}uery{S}hield: A Platform to Mitigate Enterprise Data Leakage in Queries to External {LLM}s",
author = "Ramrakhiyani, Nitin and
Myalil, Delton and
Pawar, Sachin and
Apte, Manoj and
A, Rajan M and
Saglani, Divyesh and
Shaik, Imtiyazuddin",
editor = "Chen, Weizhu and
Yang, Yi and
Kachuee, Mohammad and
Fu, Xue-Yong",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-industry.30/",
doi = "10.18653/v1/2025.naacl-industry.30",
pages = "358--369",
ISBN = "979-8-89176-194-0",
abstract = "Unrestricted access to external Large Language Models (LLM) based services like ChatGPT and Gemini can lead to potential data leakages, especially for large enterprises providing products and services to clients that require legal confidentiality guarantees. However, a blanket restriction on such services is not ideal as these LLMs boost employee productivity. Our goal is to build a solution that enables enterprise employees to query such external LLMs, without leaking confidential internal and client information. In this paper, we propose QueryShield - a platform that enterprises can use to interact with external LLMs without leaking data through queries. It detects if a query leaks data, and rephrases it to minimize data leakage while limiting the impact to its semantics. We construct a dataset of 1500 queries and manually annotate them for their sensitivity labels and their low sensitivity rephrased versions. We fine-tune a set of lightweight model candidates using this dataset and evaluate them using multiple metrics including one we propose specific to this problem."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ramrakhiyani-etal-2025-queryshield">
<titleInfo>
<title>QueryShield: A Platform to Mitigate Enterprise Data Leakage in Queries to External LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Ramrakhiyani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delton</namePart>
<namePart type="family">Myalil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sachin</namePart>
<namePart type="family">Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manoj</namePart>
<namePart type="family">Apte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajan</namePart>
<namePart type="given">M</namePart>
<namePart type="family">A</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Divyesh</namePart>
<namePart type="family">Saglani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imtiyazuddin</namePart>
<namePart type="family">Shaik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weizhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Kachuee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xue-Yong</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-194-0</identifier>
</relatedItem>
<abstract>Unrestricted access to external Large Language Models (LLM) based services like ChatGPT and Gemini can lead to potential data leakages, especially for large enterprises providing products and services to clients that require legal confidentiality guarantees. However, a blanket restriction on such services is not ideal as these LLMs boost employee productivity. Our goal is to build a solution that enables enterprise employees to query such external LLMs, without leaking confidential internal and client information. In this paper, we propose QueryShield - a platform that enterprises can use to interact with external LLMs without leaking data through queries. It detects if a query leaks data, and rephrases it to minimize data leakage while limiting the impact to its semantics. We construct a dataset of 1500 queries and manually annotate them for their sensitivity labels and their low sensitivity rephrased versions. We fine-tune a set of lightweight model candidates using this dataset and evaluate them using multiple metrics including one we propose specific to this problem.</abstract>
<identifier type="citekey">ramrakhiyani-etal-2025-queryshield</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-industry.30</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-industry.30/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>358</start>
<end>369</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T QueryShield: A Platform to Mitigate Enterprise Data Leakage in Queries to External LLMs
%A Ramrakhiyani, Nitin
%A Myalil, Delton
%A Pawar, Sachin
%A Apte, Manoj
%A A, Rajan M.
%A Saglani, Divyesh
%A Shaik, Imtiyazuddin
%Y Chen, Weizhu
%Y Yang, Yi
%Y Kachuee, Mohammad
%Y Fu, Xue-Yong
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-194-0
%F ramrakhiyani-etal-2025-queryshield
%X Unrestricted access to external Large Language Models (LLM) based services like ChatGPT and Gemini can lead to potential data leakages, especially for large enterprises providing products and services to clients that require legal confidentiality guarantees. However, a blanket restriction on such services is not ideal as these LLMs boost employee productivity. Our goal is to build a solution that enables enterprise employees to query such external LLMs, without leaking confidential internal and client information. In this paper, we propose QueryShield - a platform that enterprises can use to interact with external LLMs without leaking data through queries. It detects if a query leaks data, and rephrases it to minimize data leakage while limiting the impact to its semantics. We construct a dataset of 1500 queries and manually annotate them for their sensitivity labels and their low sensitivity rephrased versions. We fine-tune a set of lightweight model candidates using this dataset and evaluate them using multiple metrics including one we propose specific to this problem.
%R 10.18653/v1/2025.naacl-industry.30
%U https://aclanthology.org/2025.naacl-industry.30/
%U https://doi.org/10.18653/v1/2025.naacl-industry.30
%P 358-369
Markdown (Informal)
[QueryShield: A Platform to Mitigate Enterprise Data Leakage in Queries to External LLMs](https://aclanthology.org/2025.naacl-industry.30/) (Ramrakhiyani et al., NAACL 2025)
ACL
- Nitin Ramrakhiyani, Delton Myalil, Sachin Pawar, Manoj Apte, Rajan M A, Divyesh Saglani, and Imtiyazuddin Shaik. 2025. QueryShield: A Platform to Mitigate Enterprise Data Leakage in Queries to External LLMs. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track), pages 358–369, Albuquerque, New Mexico. Association for Computational Linguistics.