@inproceedings{bokaie-hosseini-etal-2020-identifying,
title = "Identifying and Classifying Third-party Entities in Natural Language Privacy Policies",
author = "Bokaie Hosseini, Mitra and
K C, Pragyan and
Reyes, Irwin and
Egelman, Serge",
editor = "Feyisetan, Oluwaseyi and
Ghanavati, Sepideh and
Malmasi, Shervin and
Thaine, Patricia",
booktitle = "Proceedings of the Second Workshop on Privacy in NLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.privatenlp-1.3",
doi = "10.18653/v1/2020.privatenlp-1.3",
pages = "18--27",
abstract = "App developers often raise revenue by contracting with third party ad networks, which serve targeted ads to end-users. To this end, a free app may collect data about its users and share it with advertising companies for targeting purposes. Regulations such as General Data Protection Regulation (GDPR) require transparency with respect to the recipients (or categories of recipients) of user data. These regulations call for app developers to have privacy policies that disclose those third party recipients of user data. Privacy policies provide users transparency into what data an app will access, collect, shared, and retain. Given the size of app marketplaces, verifying compliance with such regulations is a tedious task. This paper aims to develop an automated approach to extract and categorize third party data recipients (i.e., entities) declared in privacy policies. We analyze 100 privacy policies associated with most downloaded apps in the Google Play Store. We crowdsource the collection and annotation of app privacy policies to establish the ground truth with respect to third party entities. From this, we train various models to extract third party entities automatically. Our best model achieves average F1 score of 66{\%} when compared to crowdsourced annotations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bokaie-hosseini-etal-2020-identifying">
<titleInfo>
<title>Identifying and Classifying Third-party Entities in Natural Language Privacy Policies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mitra</namePart>
<namePart type="family">Bokaie Hosseini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pragyan</namePart>
<namePart type="family">K C</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irwin</namePart>
<namePart type="family">Reyes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Egelman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Privacy in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Thaine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>App developers often raise revenue by contracting with third party ad networks, which serve targeted ads to end-users. To this end, a free app may collect data about its users and share it with advertising companies for targeting purposes. Regulations such as General Data Protection Regulation (GDPR) require transparency with respect to the recipients (or categories of recipients) of user data. These regulations call for app developers to have privacy policies that disclose those third party recipients of user data. Privacy policies provide users transparency into what data an app will access, collect, shared, and retain. Given the size of app marketplaces, verifying compliance with such regulations is a tedious task. This paper aims to develop an automated approach to extract and categorize third party data recipients (i.e., entities) declared in privacy policies. We analyze 100 privacy policies associated with most downloaded apps in the Google Play Store. We crowdsource the collection and annotation of app privacy policies to establish the ground truth with respect to third party entities. From this, we train various models to extract third party entities automatically. Our best model achieves average F1 score of 66% when compared to crowdsourced annotations.</abstract>
<identifier type="citekey">bokaie-hosseini-etal-2020-identifying</identifier>
<identifier type="doi">10.18653/v1/2020.privatenlp-1.3</identifier>
<location>
<url>https://aclanthology.org/2020.privatenlp-1.3</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>18</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying and Classifying Third-party Entities in Natural Language Privacy Policies
%A Bokaie Hosseini, Mitra
%A K C, Pragyan
%A Reyes, Irwin
%A Egelman, Serge
%Y Feyisetan, Oluwaseyi
%Y Ghanavati, Sepideh
%Y Malmasi, Shervin
%Y Thaine, Patricia
%S Proceedings of the Second Workshop on Privacy in NLP
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F bokaie-hosseini-etal-2020-identifying
%X App developers often raise revenue by contracting with third party ad networks, which serve targeted ads to end-users. To this end, a free app may collect data about its users and share it with advertising companies for targeting purposes. Regulations such as General Data Protection Regulation (GDPR) require transparency with respect to the recipients (or categories of recipients) of user data. These regulations call for app developers to have privacy policies that disclose those third party recipients of user data. Privacy policies provide users transparency into what data an app will access, collect, shared, and retain. Given the size of app marketplaces, verifying compliance with such regulations is a tedious task. This paper aims to develop an automated approach to extract and categorize third party data recipients (i.e., entities) declared in privacy policies. We analyze 100 privacy policies associated with most downloaded apps in the Google Play Store. We crowdsource the collection and annotation of app privacy policies to establish the ground truth with respect to third party entities. From this, we train various models to extract third party entities automatically. Our best model achieves average F1 score of 66% when compared to crowdsourced annotations.
%R 10.18653/v1/2020.privatenlp-1.3
%U https://aclanthology.org/2020.privatenlp-1.3
%U https://doi.org/10.18653/v1/2020.privatenlp-1.3
%P 18-27
Markdown (Informal)
[Identifying and Classifying Third-party Entities in Natural Language Privacy Policies](https://aclanthology.org/2020.privatenlp-1.3) (Bokaie Hosseini et al., PrivateNLP 2020)
ACL