@inproceedings{melamud-shivade-2019-towards,
title = "Towards Automatic Generation of Shareable Synthetic Clinical Notes Using Neural Language Models",
author = "Melamud, Oren and
Shivade, Chaitanya",
editor = "Rumshisky, Anna and
Roberts, Kirk and
Bethard, Steven and
Naumann, Tristan",
booktitle = "Proceedings of the 2nd Clinical Natural Language Processing Workshop",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-1905",
doi = "10.18653/v1/W19-1905",
pages = "35--45",
abstract = "Large-scale clinical data is invaluable to driving many computational scientific advances today. However, understandable concerns regarding patient privacy hinder the open dissemination of such data and give rise to suboptimal siloed research. De-identification methods attempt to address these concerns but were shown to be susceptible to adversarial attacks. In this work, we focus on the vast amounts of unstructured natural language data stored in clinical notes and propose to automatically generate synthetic clinical notes that are more amenable to sharing using generative models trained on real de-identified records. To evaluate the merit of such notes, we measure both their privacy preservation properties as well as utility in training clinical NLP models. Experiments using neural language models yield notes whose utility is close to that of the real ones in some clinical NLP tasks, yet leave ample room for future improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="melamud-shivade-2019-towards">
<titleInfo>
<title>Towards Automatic Generation of Shareable Synthetic Clinical Notes Using Neural Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oren</namePart>
<namePart type="family">Melamud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaitanya</namePart>
<namePart type="family">Shivade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large-scale clinical data is invaluable to driving many computational scientific advances today. However, understandable concerns regarding patient privacy hinder the open dissemination of such data and give rise to suboptimal siloed research. De-identification methods attempt to address these concerns but were shown to be susceptible to adversarial attacks. In this work, we focus on the vast amounts of unstructured natural language data stored in clinical notes and propose to automatically generate synthetic clinical notes that are more amenable to sharing using generative models trained on real de-identified records. To evaluate the merit of such notes, we measure both their privacy preservation properties as well as utility in training clinical NLP models. Experiments using neural language models yield notes whose utility is close to that of the real ones in some clinical NLP tasks, yet leave ample room for future improvements.</abstract>
<identifier type="citekey">melamud-shivade-2019-towards</identifier>
<identifier type="doi">10.18653/v1/W19-1905</identifier>
<location>
<url>https://aclanthology.org/W19-1905</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>35</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Automatic Generation of Shareable Synthetic Clinical Notes Using Neural Language Models
%A Melamud, Oren
%A Shivade, Chaitanya
%Y Rumshisky, Anna
%Y Roberts, Kirk
%Y Bethard, Steven
%Y Naumann, Tristan
%S Proceedings of the 2nd Clinical Natural Language Processing Workshop
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota, USA
%F melamud-shivade-2019-towards
%X Large-scale clinical data is invaluable to driving many computational scientific advances today. However, understandable concerns regarding patient privacy hinder the open dissemination of such data and give rise to suboptimal siloed research. De-identification methods attempt to address these concerns but were shown to be susceptible to adversarial attacks. In this work, we focus on the vast amounts of unstructured natural language data stored in clinical notes and propose to automatically generate synthetic clinical notes that are more amenable to sharing using generative models trained on real de-identified records. To evaluate the merit of such notes, we measure both their privacy preservation properties as well as utility in training clinical NLP models. Experiments using neural language models yield notes whose utility is close to that of the real ones in some clinical NLP tasks, yet leave ample room for future improvements.
%R 10.18653/v1/W19-1905
%U https://aclanthology.org/W19-1905
%U https://doi.org/10.18653/v1/W19-1905
%P 35-45
Markdown (Informal)
[Towards Automatic Generation of Shareable Synthetic Clinical Notes Using Neural Language Models](https://aclanthology.org/W19-1905) (Melamud & Shivade, ClinicalNLP 2019)
ACL