@inproceedings{jana-biemann-2021-investigation,
title = "An Investigation towards Differentially Private Sequence Tagging in a Federated Framework",
author = "Jana, Abhik and
Biemann, Chris",
editor = "Feyisetan, Oluwaseyi and
Ghanavati, Sepideh and
Malmasi, Shervin and
Thaine, Patricia",
booktitle = "Proceedings of the Third Workshop on Privacy in Natural Language Processing",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.privatenlp-1.4",
doi = "10.18653/v1/2021.privatenlp-1.4",
pages = "30--35",
abstract = "To build machine learning-based applications for sensitive domains like medical, legal, etc. where the digitized text contains private information, anonymization of text is required for preserving privacy. Sequence tagging, e.g. as done in Named Entity Recognition (NER) can help to detect private information. However, to train sequence tagging models, a sufficient amount of labeled data are required but for privacy-sensitive domains, such labeled data also can not be shared directly. In this paper, we investigate the applicability of a privacy-preserving framework for sequence tagging tasks, specifically NER. Hence, we analyze a framework for the NER task, which incorporates two levels of privacy protection. Firstly, we deploy a federated learning (FL) framework where the labeled data are not shared with the centralized server as well as the peer clients. Secondly, we apply differential privacy (DP) while the models are being trained in each client instance. While both privacy measures are suitable for privacy-aware models, their combination results in unstable models. To our knowledge, this is the first study of its kind on privacy-aware sequence tagging models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jana-biemann-2021-investigation">
<titleInfo>
<title>An Investigation towards Differentially Private Sequence Tagging in a Federated Framework</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhik</namePart>
<namePart type="family">Jana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Privacy in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Thaine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To build machine learning-based applications for sensitive domains like medical, legal, etc. where the digitized text contains private information, anonymization of text is required for preserving privacy. Sequence tagging, e.g. as done in Named Entity Recognition (NER) can help to detect private information. However, to train sequence tagging models, a sufficient amount of labeled data are required but for privacy-sensitive domains, such labeled data also can not be shared directly. In this paper, we investigate the applicability of a privacy-preserving framework for sequence tagging tasks, specifically NER. Hence, we analyze a framework for the NER task, which incorporates two levels of privacy protection. Firstly, we deploy a federated learning (FL) framework where the labeled data are not shared with the centralized server as well as the peer clients. Secondly, we apply differential privacy (DP) while the models are being trained in each client instance. While both privacy measures are suitable for privacy-aware models, their combination results in unstable models. To our knowledge, this is the first study of its kind on privacy-aware sequence tagging models.</abstract>
<identifier type="citekey">jana-biemann-2021-investigation</identifier>
<identifier type="doi">10.18653/v1/2021.privatenlp-1.4</identifier>
<location>
<url>https://aclanthology.org/2021.privatenlp-1.4</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>30</start>
<end>35</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Investigation towards Differentially Private Sequence Tagging in a Federated Framework
%A Jana, Abhik
%A Biemann, Chris
%Y Feyisetan, Oluwaseyi
%Y Ghanavati, Sepideh
%Y Malmasi, Shervin
%Y Thaine, Patricia
%S Proceedings of the Third Workshop on Privacy in Natural Language Processing
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F jana-biemann-2021-investigation
%X To build machine learning-based applications for sensitive domains like medical, legal, etc. where the digitized text contains private information, anonymization of text is required for preserving privacy. Sequence tagging, e.g. as done in Named Entity Recognition (NER) can help to detect private information. However, to train sequence tagging models, a sufficient amount of labeled data are required but for privacy-sensitive domains, such labeled data also can not be shared directly. In this paper, we investigate the applicability of a privacy-preserving framework for sequence tagging tasks, specifically NER. Hence, we analyze a framework for the NER task, which incorporates two levels of privacy protection. Firstly, we deploy a federated learning (FL) framework where the labeled data are not shared with the centralized server as well as the peer clients. Secondly, we apply differential privacy (DP) while the models are being trained in each client instance. While both privacy measures are suitable for privacy-aware models, their combination results in unstable models. To our knowledge, this is the first study of its kind on privacy-aware sequence tagging models.
%R 10.18653/v1/2021.privatenlp-1.4
%U https://aclanthology.org/2021.privatenlp-1.4
%U https://doi.org/10.18653/v1/2021.privatenlp-1.4
%P 30-35
Markdown (Informal)
[An Investigation towards Differentially Private Sequence Tagging in a Federated Framework](https://aclanthology.org/2021.privatenlp-1.4) (Jana & Biemann, PrivateNLP 2021)
ACL