@inproceedings{oh-etal-2024-use,
title = "How to use Language Models for Synthetic Text Generation in Cerebrovascular Disease-specific Medical Reports",
author = "Oh, Byoung-Doo and
Kim, Gi-Youn and
Kim, Chulho and
Kim, Yu-Seop",
editor = "Deshpande, Ameet and
Hwang, EunJeong and
Murahari, Vishvak and
Park, Joon Sung and
Yang, Diyi and
Sabharwal, Ashish and
Narasimhan, Karthik and
Kalyan, Ashwin",
booktitle = "Proceedings of the 1st Workshop on Personalization of Generative AI Systems (PERSONALIZE 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.personalize-1.2",
pages = "10--17",
abstract = "The quantity and quality of data have a significant impact on the performance of artificial intelligence (AI). However, in the biomedical domain, data often contains sensitive information such as personal details, making it challenging to secure enough data for medical AI. Consequently, there is a growing interest in synthetic data generation for medical AI. However, research has primarily focused on medical images, with little given to text-based data such as medical records. Therefore, this study explores the application of language models (LMs) for synthetic text generation in low-resource domains like medical records. It compares the results of synthetic text generation based on different LMs. To achieve this, we focused on two criteria for LM-based synthetic text generation of medical records using two keywords entered by the user: 1) the impact of the LM{'}s knowledge, 2) the impact of the LM{'}s size. Additionally, we objectively evaluated the generated synthetic text, including representative metrics such as BLUE and ROUGE, along with clinician{'}s evaluations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oh-etal-2024-use">
<titleInfo>
<title>How to use Language Models for Synthetic Text Generation in Cerebrovascular Disease-specific Medical Reports</title>
</titleInfo>
<name type="personal">
<namePart type="given">Byoung-Doo</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gi-Youn</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chulho</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Seop</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Personalization of Generative AI Systems (PERSONALIZE 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ameet</namePart>
<namePart type="family">Deshpande</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">EunJeong</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishvak</namePart>
<namePart type="family">Murahari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joon</namePart>
<namePart type="given">Sung</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Sabharwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karthik</namePart>
<namePart type="family">Narasimhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwin</namePart>
<namePart type="family">Kalyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The quantity and quality of data have a significant impact on the performance of artificial intelligence (AI). However, in the biomedical domain, data often contains sensitive information such as personal details, making it challenging to secure enough data for medical AI. Consequently, there is a growing interest in synthetic data generation for medical AI. However, research has primarily focused on medical images, with little given to text-based data such as medical records. Therefore, this study explores the application of language models (LMs) for synthetic text generation in low-resource domains like medical records. It compares the results of synthetic text generation based on different LMs. To achieve this, we focused on two criteria for LM-based synthetic text generation of medical records using two keywords entered by the user: 1) the impact of the LM’s knowledge, 2) the impact of the LM’s size. Additionally, we objectively evaluated the generated synthetic text, including representative metrics such as BLUE and ROUGE, along with clinician’s evaluations.</abstract>
<identifier type="citekey">oh-etal-2024-use</identifier>
<location>
<url>https://aclanthology.org/2024.personalize-1.2</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>10</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How to use Language Models for Synthetic Text Generation in Cerebrovascular Disease-specific Medical Reports
%A Oh, Byoung-Doo
%A Kim, Gi-Youn
%A Kim, Chulho
%A Kim, Yu-Seop
%Y Deshpande, Ameet
%Y Hwang, EunJeong
%Y Murahari, Vishvak
%Y Park, Joon Sung
%Y Yang, Diyi
%Y Sabharwal, Ashish
%Y Narasimhan, Karthik
%Y Kalyan, Ashwin
%S Proceedings of the 1st Workshop on Personalization of Generative AI Systems (PERSONALIZE 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F oh-etal-2024-use
%X The quantity and quality of data have a significant impact on the performance of artificial intelligence (AI). However, in the biomedical domain, data often contains sensitive information such as personal details, making it challenging to secure enough data for medical AI. Consequently, there is a growing interest in synthetic data generation for medical AI. However, research has primarily focused on medical images, with little given to text-based data such as medical records. Therefore, this study explores the application of language models (LMs) for synthetic text generation in low-resource domains like medical records. It compares the results of synthetic text generation based on different LMs. To achieve this, we focused on two criteria for LM-based synthetic text generation of medical records using two keywords entered by the user: 1) the impact of the LM’s knowledge, 2) the impact of the LM’s size. Additionally, we objectively evaluated the generated synthetic text, including representative metrics such as BLUE and ROUGE, along with clinician’s evaluations.
%U https://aclanthology.org/2024.personalize-1.2
%P 10-17
Markdown (Informal)
[How to use Language Models for Synthetic Text Generation in Cerebrovascular Disease-specific Medical Reports](https://aclanthology.org/2024.personalize-1.2) (Oh et al., PERSONALIZE-WS 2024)
ACL