@inproceedings{kim-etal-2022-toward,
title = "Toward Privacy-preserving Text Embedding Similarity with Homomorphic Encryption",
author = "Kim, Donggyu and
Lee, Garam and
Oh, Sungwoo",
editor = "Chen, Chung-Chi and
Huang, Hen-Hsen and
Takamura, Hiroya and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Fourth Workshop on Financial Technology and Natural Language Processing (FinNLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.finnlp-1.4",
doi = "10.18653/v1/2022.finnlp-1.4",
pages = "25--36",
abstract = "Text embedding is an essential component to build efficient natural language applications based on text similarities such as search engines and chatbots. Certain industries like finance and healthcare demand strict privacy-preserving conditions that user{'}s data should not be exposed to any potential malicious users even including service providers. From a privacy standpoint, text embeddings seem impossible to be interpreted but there is still a privacy risk that they can be recovered to original texts through inversion attacks. To satisfy such privacy requirements, in this paper, we study a Homomorphic Encryption (HE) based text similarity inference. To validate our method, we perform extensive experiments on two vital text similarity tasks. Through text embedding inversion tests, we prove that the benchmark datasets are vulnerable to inversion attacks and another privacy preserving approach, dχ-privacy, a relaxed version of Local Differential Privacy method fails to prevent them. We show that our approach preserves the performance of models compared to that the baseline has degradation up to 10{\%} of scores for the minimum security.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2022-toward">
<titleInfo>
<title>Toward Privacy-preserving Text Embedding Similarity with Homomorphic Encryption</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donggyu</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Garam</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sungwoo</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Financial Technology and Natural Language Processing (FinNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hen-Hsen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text embedding is an essential component to build efficient natural language applications based on text similarities such as search engines and chatbots. Certain industries like finance and healthcare demand strict privacy-preserving conditions that user’s data should not be exposed to any potential malicious users even including service providers. From a privacy standpoint, text embeddings seem impossible to be interpreted but there is still a privacy risk that they can be recovered to original texts through inversion attacks. To satisfy such privacy requirements, in this paper, we study a Homomorphic Encryption (HE) based text similarity inference. To validate our method, we perform extensive experiments on two vital text similarity tasks. Through text embedding inversion tests, we prove that the benchmark datasets are vulnerable to inversion attacks and another privacy preserving approach, dχ-privacy, a relaxed version of Local Differential Privacy method fails to prevent them. We show that our approach preserves the performance of models compared to that the baseline has degradation up to 10% of scores for the minimum security.</abstract>
<identifier type="citekey">kim-etal-2022-toward</identifier>
<identifier type="doi">10.18653/v1/2022.finnlp-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.finnlp-1.4</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>25</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Toward Privacy-preserving Text Embedding Similarity with Homomorphic Encryption
%A Kim, Donggyu
%A Lee, Garam
%A Oh, Sungwoo
%Y Chen, Chung-Chi
%Y Huang, Hen-Hsen
%Y Takamura, Hiroya
%Y Chen, Hsin-Hsi
%S Proceedings of the Fourth Workshop on Financial Technology and Natural Language Processing (FinNLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F kim-etal-2022-toward
%X Text embedding is an essential component to build efficient natural language applications based on text similarities such as search engines and chatbots. Certain industries like finance and healthcare demand strict privacy-preserving conditions that user’s data should not be exposed to any potential malicious users even including service providers. From a privacy standpoint, text embeddings seem impossible to be interpreted but there is still a privacy risk that they can be recovered to original texts through inversion attacks. To satisfy such privacy requirements, in this paper, we study a Homomorphic Encryption (HE) based text similarity inference. To validate our method, we perform extensive experiments on two vital text similarity tasks. Through text embedding inversion tests, we prove that the benchmark datasets are vulnerable to inversion attacks and another privacy preserving approach, dχ-privacy, a relaxed version of Local Differential Privacy method fails to prevent them. We show that our approach preserves the performance of models compared to that the baseline has degradation up to 10% of scores for the minimum security.
%R 10.18653/v1/2022.finnlp-1.4
%U https://aclanthology.org/2022.finnlp-1.4
%U https://doi.org/10.18653/v1/2022.finnlp-1.4
%P 25-36
Markdown (Informal)
[Toward Privacy-preserving Text Embedding Similarity with Homomorphic Encryption](https://aclanthology.org/2022.finnlp-1.4) (Kim et al., FinNLP 2022)
ACL