@inproceedings{shaib-etal-2023-summarizing,
title = "Summarizing, Simplifying, and Synthesizing Medical Evidence using {GPT}-3 (with Varying Success)",
author = "Shaib, Chantal and
Li, Millicent and
Joseph, Sebastian and
Marshall, Iain and
Li, Junyi Jessy and
Wallace, Byron",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-short.119",
doi = "10.18653/v1/2023.acl-short.119",
pages = "1387--1407",
abstract = "Large language models, particularly GPT-3, are able to produce high quality summaries ofgeneral domain news articles in few- and zero-shot settings. However, it is unclear if such models are similarly capable in more specialized domains such as biomedicine. In this paper we enlist domain experts (individuals with medical training) to evaluate summaries of biomedical articles generated by GPT-3, given no supervision. We consider bothsingle- and multi-document settings. In the former, GPT-3 is tasked with generating regular and plain-language summaries of articles describing randomized controlled trials; in thelatter, we assess the degree to which GPT-3 is able to synthesize evidence reported acrossa collection of articles. We design an annotation scheme for evaluating model outputs, withan emphasis on assessing the factual accuracy of generated summaries. We find that whileGPT-3 is able to summarize and simplify single biomedical articles faithfully, it strugglesto provide accurate aggregations of findings over multiple documents. We release all data,code, and annotations used in this work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shaib-etal-2023-summarizing">
<titleInfo>
<title>Summarizing, Simplifying, and Synthesizing Medical Evidence using GPT-3 (with Varying Success)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chantal</namePart>
<namePart type="family">Shaib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Millicent</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Joseph</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iain</namePart>
<namePart type="family">Marshall</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byron</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models, particularly GPT-3, are able to produce high quality summaries ofgeneral domain news articles in few- and zero-shot settings. However, it is unclear if such models are similarly capable in more specialized domains such as biomedicine. In this paper we enlist domain experts (individuals with medical training) to evaluate summaries of biomedical articles generated by GPT-3, given no supervision. We consider bothsingle- and multi-document settings. In the former, GPT-3 is tasked with generating regular and plain-language summaries of articles describing randomized controlled trials; in thelatter, we assess the degree to which GPT-3 is able to synthesize evidence reported acrossa collection of articles. We design an annotation scheme for evaluating model outputs, withan emphasis on assessing the factual accuracy of generated summaries. We find that whileGPT-3 is able to summarize and simplify single biomedical articles faithfully, it strugglesto provide accurate aggregations of findings over multiple documents. We release all data,code, and annotations used in this work.</abstract>
<identifier type="citekey">shaib-etal-2023-summarizing</identifier>
<identifier type="doi">10.18653/v1/2023.acl-short.119</identifier>
<location>
<url>https://aclanthology.org/2023.acl-short.119</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1387</start>
<end>1407</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Summarizing, Simplifying, and Synthesizing Medical Evidence using GPT-3 (with Varying Success)
%A Shaib, Chantal
%A Li, Millicent
%A Joseph, Sebastian
%A Marshall, Iain
%A Li, Junyi Jessy
%A Wallace, Byron
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F shaib-etal-2023-summarizing
%X Large language models, particularly GPT-3, are able to produce high quality summaries ofgeneral domain news articles in few- and zero-shot settings. However, it is unclear if such models are similarly capable in more specialized domains such as biomedicine. In this paper we enlist domain experts (individuals with medical training) to evaluate summaries of biomedical articles generated by GPT-3, given no supervision. We consider bothsingle- and multi-document settings. In the former, GPT-3 is tasked with generating regular and plain-language summaries of articles describing randomized controlled trials; in thelatter, we assess the degree to which GPT-3 is able to synthesize evidence reported acrossa collection of articles. We design an annotation scheme for evaluating model outputs, withan emphasis on assessing the factual accuracy of generated summaries. We find that whileGPT-3 is able to summarize and simplify single biomedical articles faithfully, it strugglesto provide accurate aggregations of findings over multiple documents. We release all data,code, and annotations used in this work.
%R 10.18653/v1/2023.acl-short.119
%U https://aclanthology.org/2023.acl-short.119
%U https://doi.org/10.18653/v1/2023.acl-short.119
%P 1387-1407
Markdown (Informal)
[Summarizing, Simplifying, and Synthesizing Medical Evidence using GPT-3 (with Varying Success)](https://aclanthology.org/2023.acl-short.119) (Shaib et al., ACL 2023)
ACL
- Chantal Shaib, Millicent Li, Sebastian Joseph, Iain Marshall, Junyi Jessy Li, and Byron Wallace. 2023. Summarizing, Simplifying, and Synthesizing Medical Evidence using GPT-3 (with Varying Success). In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 1387–1407, Toronto, Canada. Association for Computational Linguistics.