@inproceedings{khandelwal-etal-2020-surfacing,
title = "Surfacing Privacy Settings Using Semantic Matching",
author = "Khandelwal, Rishabh and
Nayak, Asmit and
Yao, Yao and
Fawaz, Kassem",
editor = "Feyisetan, Oluwaseyi and
Ghanavati, Sepideh and
Malmasi, Shervin and
Thaine, Patricia",
booktitle = "Proceedings of the Second Workshop on Privacy in NLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.privatenlp-1.4/",
doi = "10.18653/v1/2020.privatenlp-1.4",
pages = "28--38",
abstract = "Online services utilize privacy settings to provide users with control over their data. However, these privacy settings are often hard to locate, causing the user to rely on provider-chosen default values. In this work, we train privacy-settings-centric encoders and leverage them to create an interface that allows users to search for privacy settings using free-form queries. In order to achieve this goal, we create a custom Semantic Similarity dataset, which consists of real user queries covering various privacy settings. We then use this dataset to fine-tune a state of the art encoder. Using this fine-tuned encoder, we perform semantic matching between the user queries and the privacy settings to retrieve the most relevant setting. Finally, we also use the encoder to generate embeddings of privacy settings from the top 100 websites and perform unsupervised clustering to learn about the online privacy settings types. We find that the most common type of privacy settings are {\textquoteleft}Personalization' and {\textquoteleft}Notifications', with coverage of 35.8{\%} and 34.4{\%}, respectively, in our dataset."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khandelwal-etal-2020-surfacing">
<titleInfo>
<title>Surfacing Privacy Settings Using Semantic Matching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="family">Khandelwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asmit</namePart>
<namePart type="family">Nayak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yao</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kassem</namePart>
<namePart type="family">Fawaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Privacy in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Thaine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Online services utilize privacy settings to provide users with control over their data. However, these privacy settings are often hard to locate, causing the user to rely on provider-chosen default values. In this work, we train privacy-settings-centric encoders and leverage them to create an interface that allows users to search for privacy settings using free-form queries. In order to achieve this goal, we create a custom Semantic Similarity dataset, which consists of real user queries covering various privacy settings. We then use this dataset to fine-tune a state of the art encoder. Using this fine-tuned encoder, we perform semantic matching between the user queries and the privacy settings to retrieve the most relevant setting. Finally, we also use the encoder to generate embeddings of privacy settings from the top 100 websites and perform unsupervised clustering to learn about the online privacy settings types. We find that the most common type of privacy settings are ‘Personalization’ and ‘Notifications’, with coverage of 35.8% and 34.4%, respectively, in our dataset.</abstract>
<identifier type="citekey">khandelwal-etal-2020-surfacing</identifier>
<identifier type="doi">10.18653/v1/2020.privatenlp-1.4</identifier>
<location>
<url>https://aclanthology.org/2020.privatenlp-1.4/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>28</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Surfacing Privacy Settings Using Semantic Matching
%A Khandelwal, Rishabh
%A Nayak, Asmit
%A Yao, Yao
%A Fawaz, Kassem
%Y Feyisetan, Oluwaseyi
%Y Ghanavati, Sepideh
%Y Malmasi, Shervin
%Y Thaine, Patricia
%S Proceedings of the Second Workshop on Privacy in NLP
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F khandelwal-etal-2020-surfacing
%X Online services utilize privacy settings to provide users with control over their data. However, these privacy settings are often hard to locate, causing the user to rely on provider-chosen default values. In this work, we train privacy-settings-centric encoders and leverage them to create an interface that allows users to search for privacy settings using free-form queries. In order to achieve this goal, we create a custom Semantic Similarity dataset, which consists of real user queries covering various privacy settings. We then use this dataset to fine-tune a state of the art encoder. Using this fine-tuned encoder, we perform semantic matching between the user queries and the privacy settings to retrieve the most relevant setting. Finally, we also use the encoder to generate embeddings of privacy settings from the top 100 websites and perform unsupervised clustering to learn about the online privacy settings types. We find that the most common type of privacy settings are ‘Personalization’ and ‘Notifications’, with coverage of 35.8% and 34.4%, respectively, in our dataset.
%R 10.18653/v1/2020.privatenlp-1.4
%U https://aclanthology.org/2020.privatenlp-1.4/
%U https://doi.org/10.18653/v1/2020.privatenlp-1.4
%P 28-38
Markdown (Informal)
[Surfacing Privacy Settings Using Semantic Matching](https://aclanthology.org/2020.privatenlp-1.4/) (Khandelwal et al., PrivateNLP 2020)
ACL