@inproceedings{rodriguez-etal-2025-comparative,
title = "Comparative Analysis of Human and Large Language Model Performance in Pharmacology Multiple-Choice Questions",
author = "Rodriguez, Ricardo and
Huet, St{\~A}{\textcopyright}phane and
Favre, Benoit and
Rouvier, Mickael",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.117/",
pages = "1022--1029",
abstract = "In this article, we study the answers generated by a selection of Large Language Models to a set of Multiple Choice Questions in Pharmacology, and compare them to the answers provided by students, to understand which questions in this clinical domain are difficult for the models when compared to humans and why. We extract the internal logits to infer probability distributions and analyse the main features that determine the difficulty of questions using statistical methods. We also provide an extension to the FrenchMedMCQA dataset, with pairs of question-answers in pharmacology, enriched with student response rate, answer scoring, clinical topics, and annotations on question structure and semantics."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rodriguez-etal-2025-comparative">
<titleInfo>
<title>Comparative Analysis of Human and Large Language Model Performance in Pharmacology Multiple-Choice Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stéphane</namePart>
<namePart type="family">Huet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoit</namePart>
<namePart type="family">Favre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mickael</namePart>
<namePart type="family">Rouvier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this article, we study the answers generated by a selection of Large Language Models to a set of Multiple Choice Questions in Pharmacology, and compare them to the answers provided by students, to understand which questions in this clinical domain are difficult for the models when compared to humans and why. We extract the internal logits to infer probability distributions and analyse the main features that determine the difficulty of questions using statistical methods. We also provide an extension to the FrenchMedMCQA dataset, with pairs of question-answers in pharmacology, enriched with student response rate, answer scoring, clinical topics, and annotations on question structure and semantics.</abstract>
<identifier type="citekey">rodriguez-etal-2025-comparative</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.117/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1022</start>
<end>1029</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparative Analysis of Human and Large Language Model Performance in Pharmacology Multiple-Choice Questions
%A Rodriguez, Ricardo
%A Huet, Stéphane
%A Favre, Benoit
%A Rouvier, Mickael
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F rodriguez-etal-2025-comparative
%X In this article, we study the answers generated by a selection of Large Language Models to a set of Multiple Choice Questions in Pharmacology, and compare them to the answers provided by students, to understand which questions in this clinical domain are difficult for the models when compared to humans and why. We extract the internal logits to infer probability distributions and analyse the main features that determine the difficulty of questions using statistical methods. We also provide an extension to the FrenchMedMCQA dataset, with pairs of question-answers in pharmacology, enriched with student response rate, answer scoring, clinical topics, and annotations on question structure and semantics.
%U https://aclanthology.org/2025.ranlp-1.117/
%P 1022-1029
Markdown (Informal)
[Comparative Analysis of Human and Large Language Model Performance in Pharmacology Multiple-Choice Questions](https://aclanthology.org/2025.ranlp-1.117/) (Rodriguez et al., RANLP 2025)
ACL