@inproceedings{amin-nejad-etal-2020-exploring,
title = "Exploring Transformer Text Generation for Medical Dataset Augmentation",
author = "Amin-Nejad, Ali and
Ive, Julia and
Velupillai, Sumithra",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.578/",
pages = "4699--4708",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "Natural Language Processing (NLP) can help unlock the vast troves of unstructured data in clinical text and thus improve healthcare research. However, a big barrier to developments in this field is data access due to patient confidentiality which prohibits the sharing of this data, resulting in small, fragmented and sequestered openly available datasets. Since NLP model development requires large quantities of data, we aim to help side-step this roadblock by exploring the usage of Natural Language Generation in augmenting datasets such that they can be used for NLP model development on downstream clinically relevant tasks. We propose a methodology guiding the generation with structured patient information in a sequence-to-sequence manner. We experiment with state-of-the-art Transformer models and demonstrate that our augmented dataset is capable of beating our baselines on a downstream classification task. Finally, we also create a user interface and release the scripts to train generation models to stimulate further research in this area."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amin-nejad-etal-2020-exploring">
<titleInfo>
<title>Exploring Transformer Text Generation for Medical Dataset Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Amin-Nejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Ive</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumithra</namePart>
<namePart type="family">Velupillai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Natural Language Processing (NLP) can help unlock the vast troves of unstructured data in clinical text and thus improve healthcare research. However, a big barrier to developments in this field is data access due to patient confidentiality which prohibits the sharing of this data, resulting in small, fragmented and sequestered openly available datasets. Since NLP model development requires large quantities of data, we aim to help side-step this roadblock by exploring the usage of Natural Language Generation in augmenting datasets such that they can be used for NLP model development on downstream clinically relevant tasks. We propose a methodology guiding the generation with structured patient information in a sequence-to-sequence manner. We experiment with state-of-the-art Transformer models and demonstrate that our augmented dataset is capable of beating our baselines on a downstream classification task. Finally, we also create a user interface and release the scripts to train generation models to stimulate further research in this area.</abstract>
<identifier type="citekey">amin-nejad-etal-2020-exploring</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.578/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>4699</start>
<end>4708</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Transformer Text Generation for Medical Dataset Augmentation
%A Amin-Nejad, Ali
%A Ive, Julia
%A Velupillai, Sumithra
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G eng
%F amin-nejad-etal-2020-exploring
%X Natural Language Processing (NLP) can help unlock the vast troves of unstructured data in clinical text and thus improve healthcare research. However, a big barrier to developments in this field is data access due to patient confidentiality which prohibits the sharing of this data, resulting in small, fragmented and sequestered openly available datasets. Since NLP model development requires large quantities of data, we aim to help side-step this roadblock by exploring the usage of Natural Language Generation in augmenting datasets such that they can be used for NLP model development on downstream clinically relevant tasks. We propose a methodology guiding the generation with structured patient information in a sequence-to-sequence manner. We experiment with state-of-the-art Transformer models and demonstrate that our augmented dataset is capable of beating our baselines on a downstream classification task. Finally, we also create a user interface and release the scripts to train generation models to stimulate further research in this area.
%U https://aclanthology.org/2020.lrec-1.578/
%P 4699-4708
Markdown (Informal)
[Exploring Transformer Text Generation for Medical Dataset Augmentation](https://aclanthology.org/2020.lrec-1.578/) (Amin-Nejad et al., LREC 2020)
ACL