@inproceedings{seo-etal-2026-evaluating,
title = "Evaluating the Pre-Consultation Ability of {LLM}s using Diagnostic Guidelines",
author = "Seo, Jean and
Kim, Gibaeg and
Shin, Kihun and
Lim, Seungseop and
Lee, Hyunkyung and
Han, Wooseok and
Lee, Jongwon and
Yang, Eunho",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-industry.6/",
pages = "78--94",
ISBN = "979-8-89176-384-5",
abstract = "We introduce EPAG, a benchmark dataset and framework designed for evaluating the pre-consultation ability of LLMs using diagnostic guidelines. LLMs are evaluated directly through HPI-diagnostic guideline comparison and indirectly through disease diagnosis. In our experiments, we observe that small open-source models fine-tuned with a well-curated, task-specific dataset can outperform frontier LLMs in pre-consultation. Additionally, we find that increased amount of HPI (History of Present Illness) does not necessarily lead to improved diagnostic performance. Further experiments reveal that the language of pre-consultation influences the characteristics of the dialogue. By open-sourcing our dataset and evaluation pipeline on https://github.com/seemdog/EPAG, we aim to contribute to the evaluation and further development of LLM applications in real-world clinical settings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="seo-etal-2026-evaluating">
<titleInfo>
<title>Evaluating the Pre-Consultation Ability of LLMs using Diagnostic Guidelines</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jean</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gibaeg</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kihun</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seungseop</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyunkyung</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wooseok</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jongwon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunho</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yevgen</namePart>
<namePart type="family">Matusevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gülşen</namePart>
<namePart type="family">Eryiğit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-384-5</identifier>
</relatedItem>
<abstract>We introduce EPAG, a benchmark dataset and framework designed for evaluating the pre-consultation ability of LLMs using diagnostic guidelines. LLMs are evaluated directly through HPI-diagnostic guideline comparison and indirectly through disease diagnosis. In our experiments, we observe that small open-source models fine-tuned with a well-curated, task-specific dataset can outperform frontier LLMs in pre-consultation. Additionally, we find that increased amount of HPI (History of Present Illness) does not necessarily lead to improved diagnostic performance. Further experiments reveal that the language of pre-consultation influences the characteristics of the dialogue. By open-sourcing our dataset and evaluation pipeline on https://github.com/seemdog/EPAG, we aim to contribute to the evaluation and further development of LLM applications in real-world clinical settings.</abstract>
<identifier type="citekey">seo-etal-2026-evaluating</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-industry.6/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>78</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating the Pre-Consultation Ability of LLMs using Diagnostic Guidelines
%A Seo, Jean
%A Kim, Gibaeg
%A Shin, Kihun
%A Lim, Seungseop
%A Lee, Hyunkyung
%A Han, Wooseok
%A Lee, Jongwon
%A Yang, Eunho
%Y Matusevych, Yevgen
%Y Eryiğit, Gülşen
%Y Aletras, Nikolaos
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-384-5
%F seo-etal-2026-evaluating
%X We introduce EPAG, a benchmark dataset and framework designed for evaluating the pre-consultation ability of LLMs using diagnostic guidelines. LLMs are evaluated directly through HPI-diagnostic guideline comparison and indirectly through disease diagnosis. In our experiments, we observe that small open-source models fine-tuned with a well-curated, task-specific dataset can outperform frontier LLMs in pre-consultation. Additionally, we find that increased amount of HPI (History of Present Illness) does not necessarily lead to improved diagnostic performance. Further experiments reveal that the language of pre-consultation influences the characteristics of the dialogue. By open-sourcing our dataset and evaluation pipeline on https://github.com/seemdog/EPAG, we aim to contribute to the evaluation and further development of LLM applications in real-world clinical settings.
%U https://aclanthology.org/2026.eacl-industry.6/
%P 78-94
Markdown (Informal)
[Evaluating the Pre-Consultation Ability of LLMs using Diagnostic Guidelines](https://aclanthology.org/2026.eacl-industry.6/) (Seo et al., EACL 2026)
ACL
- Jean Seo, Gibaeg Kim, Kihun Shin, Seungseop Lim, Hyunkyung Lee, Wooseok Han, Jongwon Lee, and Eunho Yang. 2026. Evaluating the Pre-Consultation Ability of LLMs using Diagnostic Guidelines. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track), pages 78–94, Rabat, Morocco. Association for Computational Linguistics.