@inproceedings{cai-etal-2026-dentalgpt,
title = "{D}ental{GPT}: Incentivizing Multimodal Reasoning in Dentistry",
author = "Cai, Zhenyang and
Zhang, Jiaming and
Zhao, Junjie and
Zeng, Ziyi and
Li, Yanchao and
Jingyi, Liang and
Chen, Junying and
Yang, Yunjin and
You, Jiajun and
Deng, Shuzhi and
Xieruiqiii and
Chen, Yuanting and
Feng, Xiangyi and
Li, Jianquan and
Chen, Liangyi and
Wang, Junwen and
Jiang, Shan and
Wang, Benyou",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.135/",
pages = "2811--2829",
ISBN = "979-8-89176-395-1",
abstract = "Reliable interpretation of multimodal dental data is essential for automated oral healthcare, yet current multimodal large language models (MLLMs) show limited understanding of dental images. Although complex reasoning improves performance, its gains in dentistry are substantially smaller than in other medical domains, suggesting that complex reasoning is not yet sufficiently incentivized for dental diagnosis, likely due to insufficient domain knowledge and limited reinforcement learning on dental questions. We present DentalGPT, a dentistry-specialized MLLM trained via staged multimodal alignment and reinforcement learning. By constructing the largest annotated multimodal dental dataset to date with over 120k images, multimodal alignment provides the necessary domain knowledge foundation to support and incentivize complex reasoning, which is further strengthened through reinforcement learning. Experiments on expert-annotated benchmarks and dental subsets of medical VQA benchmarks show that DentalGPT achieves superior performance on disease classification and dental VQA tasks, outperforming many state-of-the-art MLLMs despite its compact 7B parameter scale."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cai-etal-2026-dentalgpt">
<titleInfo>
<title>DentalGPT: Incentivizing Multimodal Reasoning in Dentistry</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhenyang</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaming</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyi</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanchao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Jingyi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junying</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunjin</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">You</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuzhi</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name>
<namePart>Xieruiqiii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanting</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyi</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianquan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liangyi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junwen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benyou</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Reliable interpretation of multimodal dental data is essential for automated oral healthcare, yet current multimodal large language models (MLLMs) show limited understanding of dental images. Although complex reasoning improves performance, its gains in dentistry are substantially smaller than in other medical domains, suggesting that complex reasoning is not yet sufficiently incentivized for dental diagnosis, likely due to insufficient domain knowledge and limited reinforcement learning on dental questions. We present DentalGPT, a dentistry-specialized MLLM trained via staged multimodal alignment and reinforcement learning. By constructing the largest annotated multimodal dental dataset to date with over 120k images, multimodal alignment provides the necessary domain knowledge foundation to support and incentivize complex reasoning, which is further strengthened through reinforcement learning. Experiments on expert-annotated benchmarks and dental subsets of medical VQA benchmarks show that DentalGPT achieves superior performance on disease classification and dental VQA tasks, outperforming many state-of-the-art MLLMs despite its compact 7B parameter scale.</abstract>
<identifier type="citekey">cai-etal-2026-dentalgpt</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.135/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2811</start>
<end>2829</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DentalGPT: Incentivizing Multimodal Reasoning in Dentistry
%A Cai, Zhenyang
%A Zhang, Jiaming
%A Zhao, Junjie
%A Zeng, Ziyi
%A Li, Yanchao
%A Jingyi, Liang
%A Chen, Junying
%A Yang, Yunjin
%A You, Jiajun
%A Deng, Shuzhi
%A Chen, Yuanting
%A Feng, Xiangyi
%A Li, Jianquan
%A Chen, Liangyi
%A Wang, Junwen
%A Jiang, Shan
%A Wang, Benyou
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%A Xieruiqiii
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F cai-etal-2026-dentalgpt
%X Reliable interpretation of multimodal dental data is essential for automated oral healthcare, yet current multimodal large language models (MLLMs) show limited understanding of dental images. Although complex reasoning improves performance, its gains in dentistry are substantially smaller than in other medical domains, suggesting that complex reasoning is not yet sufficiently incentivized for dental diagnosis, likely due to insufficient domain knowledge and limited reinforcement learning on dental questions. We present DentalGPT, a dentistry-specialized MLLM trained via staged multimodal alignment and reinforcement learning. By constructing the largest annotated multimodal dental dataset to date with over 120k images, multimodal alignment provides the necessary domain knowledge foundation to support and incentivize complex reasoning, which is further strengthened through reinforcement learning. Experiments on expert-annotated benchmarks and dental subsets of medical VQA benchmarks show that DentalGPT achieves superior performance on disease classification and dental VQA tasks, outperforming many state-of-the-art MLLMs despite its compact 7B parameter scale.
%U https://aclanthology.org/2026.findings-acl.135/
%P 2811-2829
Markdown (Informal)
[DentalGPT: Incentivizing Multimodal Reasoning in Dentistry](https://aclanthology.org/2026.findings-acl.135/) (Cai et al., Findings 2026)
ACL
- Zhenyang Cai, Jiaming Zhang, Junjie Zhao, Ziyi Zeng, Yanchao Li, Liang Jingyi, Junying Chen, Yunjin Yang, Jiajun You, Shuzhi Deng, Xieruiqiii, Yuanting Chen, Xiangyi Feng, Jianquan Li, Liangyi Chen, Junwen Wang, Shan Jiang, and Benyou Wang. 2026. DentalGPT: Incentivizing Multimodal Reasoning in Dentistry. In Findings of the Association for Computational Linguistics: ACL 2026, pages 2811–2829, San Diego, California, United States. Association for Computational Linguistics.