@inproceedings{khlaut-etal-2024-efficient,
title = "Efficient Medical Question Answering with Knowledge-Augmented Question Generation",
author = "Khlaut, Julien and
Dancette, Corentin and
Ferreres, Elodie and
Alaedine, Benani and
Herent, Herent and
Manceron, Pierre",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Bitterman, Danielle",
booktitle = "Proceedings of the 6th Clinical Natural Language Processing Workshop",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clinicalnlp-1.2/",
doi = "10.18653/v1/2024.clinicalnlp-1.2",
pages = "10--20",
abstract = "In the expanding field of language model applications, medical knowledge representation remains a significant challenge due to the specialized nature of the domain. Large language models, such as GPT-4, obtain reasonable scores on medical question-answering tasks, but smaller models are far behind.In this work, we introduce a method to improve the proficiency of a small language model in the medical domain by employing a two-fold approach. We first fine-tune the model on a corpus of medical textbooks. Then, we use GPT-4 to generate questions similar to the downstream task, prompted with textbook knowledge, and use them to fine-tune the model. Additionally, we introduce ECN-QA, a novel Medical QA dataset containing {\textquotedblleft}progressive questions{\textquotedblright} composed of related sequential questions. We show the benefits of our training strategy on this dataset.The study`s findings highlight the potential of small language models in the medical domain when appropriately fine-tuned."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khlaut-etal-2024-efficient">
<titleInfo>
<title>Efficient Medical Question Answering with Knowledge-Augmented Question Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Khlaut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Corentin</namePart>
<namePart type="family">Dancette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elodie</namePart>
<namePart type="family">Ferreres</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benani</namePart>
<namePart type="family">Alaedine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Herent</namePart>
<namePart type="family">Herent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Manceron</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="family">Ben Abacha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danielle</namePart>
<namePart type="family">Bitterman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the expanding field of language model applications, medical knowledge representation remains a significant challenge due to the specialized nature of the domain. Large language models, such as GPT-4, obtain reasonable scores on medical question-answering tasks, but smaller models are far behind.In this work, we introduce a method to improve the proficiency of a small language model in the medical domain by employing a two-fold approach. We first fine-tune the model on a corpus of medical textbooks. Then, we use GPT-4 to generate questions similar to the downstream task, prompted with textbook knowledge, and use them to fine-tune the model. Additionally, we introduce ECN-QA, a novel Medical QA dataset containing “progressive questions” composed of related sequential questions. We show the benefits of our training strategy on this dataset.The study‘s findings highlight the potential of small language models in the medical domain when appropriately fine-tuned.</abstract>
<identifier type="citekey">khlaut-etal-2024-efficient</identifier>
<identifier type="doi">10.18653/v1/2024.clinicalnlp-1.2</identifier>
<location>
<url>https://aclanthology.org/2024.clinicalnlp-1.2/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>10</start>
<end>20</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Efficient Medical Question Answering with Knowledge-Augmented Question Generation
%A Khlaut, Julien
%A Dancette, Corentin
%A Ferreres, Elodie
%A Alaedine, Benani
%A Herent, Herent
%A Manceron, Pierre
%Y Naumann, Tristan
%Y Ben Abacha, Asma
%Y Bethard, Steven
%Y Roberts, Kirk
%Y Bitterman, Danielle
%S Proceedings of the 6th Clinical Natural Language Processing Workshop
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F khlaut-etal-2024-efficient
%X In the expanding field of language model applications, medical knowledge representation remains a significant challenge due to the specialized nature of the domain. Large language models, such as GPT-4, obtain reasonable scores on medical question-answering tasks, but smaller models are far behind.In this work, we introduce a method to improve the proficiency of a small language model in the medical domain by employing a two-fold approach. We first fine-tune the model on a corpus of medical textbooks. Then, we use GPT-4 to generate questions similar to the downstream task, prompted with textbook knowledge, and use them to fine-tune the model. Additionally, we introduce ECN-QA, a novel Medical QA dataset containing “progressive questions” composed of related sequential questions. We show the benefits of our training strategy on this dataset.The study‘s findings highlight the potential of small language models in the medical domain when appropriately fine-tuned.
%R 10.18653/v1/2024.clinicalnlp-1.2
%U https://aclanthology.org/2024.clinicalnlp-1.2/
%U https://doi.org/10.18653/v1/2024.clinicalnlp-1.2
%P 10-20
Markdown (Informal)
[Efficient Medical Question Answering with Knowledge-Augmented Question Generation](https://aclanthology.org/2024.clinicalnlp-1.2/) (Khlaut et al., ClinicalNLP 2024)
ACL