@inproceedings{landes-etal-2023-hospital,
title = "Hospital Discharge Summarization Data Provenance",
author = "Landes, Paul and
Chaise, Aaron and
Patel, Kunal and
Huang, Sean and
Di Eugenio, Barbara",
editor = "Demner-fushman, Dina and
Ananiadou, Sophia and
Cohen, Kevin",
booktitle = "The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bionlp-1.41",
doi = "10.18653/v1/2023.bionlp-1.41",
pages = "439--448",
abstract = "Summarization of medical notes has been studied for decades with hospital discharge summaries garnering recent interest in the research community. While methods for summarizing these notes have been the focus, there has been little work in understanding the feasibility of this task. We believe this effort is warranted given the notes{'} length and complexity, and that they are often riddled with poorly formatted structured data and redundancy in copy and pasted text. In this work, we investigate the feasibility of the summarization task by finding the origin, or data provenance, of the discharge summary{'}s source text. As a motivation to understanding the data challenges of the summarization task, we present DSProv, a new dataset of 51 hospital admissions annotated by clinical informatics physicians. The dataset is analyzed for semantics and the extent of copied text from human authored electronic health record (EHR) notes. We also present a novel unsupervised method of matching notes used in discharge summaries, and release our annotation dataset1 and source code to the community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="landes-etal-2023-hospital">
<titleInfo>
<title>Hospital Discharge Summarization Data Provenance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Landes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Chaise</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kunal</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Di Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Summarization of medical notes has been studied for decades with hospital discharge summaries garnering recent interest in the research community. While methods for summarizing these notes have been the focus, there has been little work in understanding the feasibility of this task. We believe this effort is warranted given the notes’ length and complexity, and that they are often riddled with poorly formatted structured data and redundancy in copy and pasted text. In this work, we investigate the feasibility of the summarization task by finding the origin, or data provenance, of the discharge summary’s source text. As a motivation to understanding the data challenges of the summarization task, we present DSProv, a new dataset of 51 hospital admissions annotated by clinical informatics physicians. The dataset is analyzed for semantics and the extent of copied text from human authored electronic health record (EHR) notes. We also present a novel unsupervised method of matching notes used in discharge summaries, and release our annotation dataset1 and source code to the community.</abstract>
<identifier type="citekey">landes-etal-2023-hospital</identifier>
<identifier type="doi">10.18653/v1/2023.bionlp-1.41</identifier>
<location>
<url>https://aclanthology.org/2023.bionlp-1.41</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>439</start>
<end>448</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hospital Discharge Summarization Data Provenance
%A Landes, Paul
%A Chaise, Aaron
%A Patel, Kunal
%A Huang, Sean
%A Di Eugenio, Barbara
%Y Demner-fushman, Dina
%Y Ananiadou, Sophia
%Y Cohen, Kevin
%S The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F landes-etal-2023-hospital
%X Summarization of medical notes has been studied for decades with hospital discharge summaries garnering recent interest in the research community. While methods for summarizing these notes have been the focus, there has been little work in understanding the feasibility of this task. We believe this effort is warranted given the notes’ length and complexity, and that they are often riddled with poorly formatted structured data and redundancy in copy and pasted text. In this work, we investigate the feasibility of the summarization task by finding the origin, or data provenance, of the discharge summary’s source text. As a motivation to understanding the data challenges of the summarization task, we present DSProv, a new dataset of 51 hospital admissions annotated by clinical informatics physicians. The dataset is analyzed for semantics and the extent of copied text from human authored electronic health record (EHR) notes. We also present a novel unsupervised method of matching notes used in discharge summaries, and release our annotation dataset1 and source code to the community.
%R 10.18653/v1/2023.bionlp-1.41
%U https://aclanthology.org/2023.bionlp-1.41
%U https://doi.org/10.18653/v1/2023.bionlp-1.41
%P 439-448
Markdown (Informal)
[Hospital Discharge Summarization Data Provenance](https://aclanthology.org/2023.bionlp-1.41) (Landes et al., BioNLP 2023)
ACL
- Paul Landes, Aaron Chaise, Kunal Patel, Sean Huang, and Barbara Di Eugenio. 2023. Hospital Discharge Summarization Data Provenance. In The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks, pages 439–448, Toronto, Canada. Association for Computational Linguistics.