@inproceedings{nair-etal-2024-dera,
title = "{DERA}: Enhancing Large Language Model Completions with Dialog-Enabled Resolving Agents",
author = "Nair, Varun and
Schumacher, Elliot and
Tso, Geoffrey and
Kannan, Anitha",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Bitterman, Danielle",
booktitle = "Proceedings of the 6th Clinical Natural Language Processing Workshop",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clinicalnlp-1.12",
doi = "10.18653/v1/2024.clinicalnlp-1.12",
pages = "122--161",
abstract = "Large language models (LLMs) have emerged as valuable tools for many natural language understanding tasks. In safety-critical applications such as healthcare, the utility of these models is governed by their ability to generate factually accurate and complete outputs. In this work, we present dialog-enabled resolving agents (DERA). DERA is a paradigm made possible by the increased conversational abilities of LLMs. It provides a simple, interpretable forum for models to communicate feedback and iteratively improve output. We frame our dialog as a discussion between two agent types {--} a Researcher, who processes information and identifies crucial problem components, and a Decider, who has the autonomy to integrate the Researcher{'}s information and makes judgments on the final output.We test DERA against three clinically-focused tasks, with GPT-4 serving as our LLM. DERA shows significant improvement over the base GPT-4 performance in both human expert preference evaluations and quantitative metrics for medical conversation summarization and care plan generation. In a new finding, we also show that GPT-4{'}s performance (70{\%}) on an open-ended version of the MedQA question-answering (QA) dataset (Jin 2021; USMLE) is well above the passing level (60{\%}), with DERA showing similar performance. We will release the open-ended MedQA dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nair-etal-2024-dera">
<titleInfo>
<title>DERA: Enhancing Large Language Model Completions with Dialog-Enabled Resolving Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Nair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elliot</namePart>
<namePart type="family">Schumacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geoffrey</namePart>
<namePart type="family">Tso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anitha</namePart>
<namePart type="family">Kannan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="family">Ben Abacha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danielle</namePart>
<namePart type="family">Bitterman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have emerged as valuable tools for many natural language understanding tasks. In safety-critical applications such as healthcare, the utility of these models is governed by their ability to generate factually accurate and complete outputs. In this work, we present dialog-enabled resolving agents (DERA). DERA is a paradigm made possible by the increased conversational abilities of LLMs. It provides a simple, interpretable forum for models to communicate feedback and iteratively improve output. We frame our dialog as a discussion between two agent types – a Researcher, who processes information and identifies crucial problem components, and a Decider, who has the autonomy to integrate the Researcher’s information and makes judgments on the final output.We test DERA against three clinically-focused tasks, with GPT-4 serving as our LLM. DERA shows significant improvement over the base GPT-4 performance in both human expert preference evaluations and quantitative metrics for medical conversation summarization and care plan generation. In a new finding, we also show that GPT-4’s performance (70%) on an open-ended version of the MedQA question-answering (QA) dataset (Jin 2021; USMLE) is well above the passing level (60%), with DERA showing similar performance. We will release the open-ended MedQA dataset.</abstract>
<identifier type="citekey">nair-etal-2024-dera</identifier>
<identifier type="doi">10.18653/v1/2024.clinicalnlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2024.clinicalnlp-1.12</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>122</start>
<end>161</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DERA: Enhancing Large Language Model Completions with Dialog-Enabled Resolving Agents
%A Nair, Varun
%A Schumacher, Elliot
%A Tso, Geoffrey
%A Kannan, Anitha
%Y Naumann, Tristan
%Y Ben Abacha, Asma
%Y Bethard, Steven
%Y Roberts, Kirk
%Y Bitterman, Danielle
%S Proceedings of the 6th Clinical Natural Language Processing Workshop
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F nair-etal-2024-dera
%X Large language models (LLMs) have emerged as valuable tools for many natural language understanding tasks. In safety-critical applications such as healthcare, the utility of these models is governed by their ability to generate factually accurate and complete outputs. In this work, we present dialog-enabled resolving agents (DERA). DERA is a paradigm made possible by the increased conversational abilities of LLMs. It provides a simple, interpretable forum for models to communicate feedback and iteratively improve output. We frame our dialog as a discussion between two agent types – a Researcher, who processes information and identifies crucial problem components, and a Decider, who has the autonomy to integrate the Researcher’s information and makes judgments on the final output.We test DERA against three clinically-focused tasks, with GPT-4 serving as our LLM. DERA shows significant improvement over the base GPT-4 performance in both human expert preference evaluations and quantitative metrics for medical conversation summarization and care plan generation. In a new finding, we also show that GPT-4’s performance (70%) on an open-ended version of the MedQA question-answering (QA) dataset (Jin 2021; USMLE) is well above the passing level (60%), with DERA showing similar performance. We will release the open-ended MedQA dataset.
%R 10.18653/v1/2024.clinicalnlp-1.12
%U https://aclanthology.org/2024.clinicalnlp-1.12
%U https://doi.org/10.18653/v1/2024.clinicalnlp-1.12
%P 122-161
Markdown (Informal)
[DERA: Enhancing Large Language Model Completions with Dialog-Enabled Resolving Agents](https://aclanthology.org/2024.clinicalnlp-1.12) (Nair et al., ClinicalNLP-WS 2024)
ACL