@inproceedings{oukelmoun-etal-2025-detecting,
title = "Detecting Omissions in {LLM}-Generated Medical Summaries",
author = {Oukelmoun, Achir and
Semmar, Nasredine and
de Chalendar, Ga{\"e}l and
Cormi, Clement and
Oukelmoun, Mariame and
Vibert, Eric and
Allard, Marc-Antoine},
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.22/",
doi = "10.18653/v1/2025.emnlp-industry.22",
pages = "325--337",
ISBN = "979-8-89176-333-3",
abstract = "With the emergence of Large Language Models (LLMs), numerous use cases have arisen in the medical field, particularly in generating summaries for consultation transcriptions and extensive medical reports. A major concern is that these summaries may omit critical information from the original input, potentially jeopardizing the decision-making process. This issue of omission is distinct from hallucination, which involves generating incorrect or fabricated facts. To address omissions, this paper introduces a dataset designed to evaluate such issues and proposes a frugal approach called EmbedKDECheck for detecting omissions in LLM-generated texts. The dataset, created in French, has been validated by medical experts to ensure it accurately represents real-world scenarios in the medical field. The objective is to develop a reference-free (black-box) method that can evaluate the reliability of summaries or reports without requiring significant computational resources, relying only on input and output. Unlike methods that rely on embeddings derived from the LLM itself, our approach uses embeddings generated by a third-party, lightweight NLP model based on a combination of FastText and Word2Vec. These embeddings are then combined with anomaly detection models to identify omissions effectively, making the method well-suited for resource-constrained environments. EmbedKDECheck was benchmarked against black-box state-of-the-art frameworks and models, including SelfCheckGPT, ChainPoll, and G-Eval, which leverage GPT. Results demonstrated its satisfactory performance in detecting omissions in LLM-generated summaries. This work advances frugal methodologies for evaluating the reliability of LLM-generated texts, with significant potential to improve the safety and accuracy of medical decision support systems in surgery and other healthcare domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oukelmoun-etal-2025-detecting">
<titleInfo>
<title>Detecting Omissions in LLM-Generated Medical Summaries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Achir</namePart>
<namePart type="family">Oukelmoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nasredine</namePart>
<namePart type="family">Semmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaël</namePart>
<namePart type="family">de Chalendar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clement</namePart>
<namePart type="family">Cormi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariame</namePart>
<namePart type="family">Oukelmoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Vibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc-Antoine</namePart>
<namePart type="family">Allard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>With the emergence of Large Language Models (LLMs), numerous use cases have arisen in the medical field, particularly in generating summaries for consultation transcriptions and extensive medical reports. A major concern is that these summaries may omit critical information from the original input, potentially jeopardizing the decision-making process. This issue of omission is distinct from hallucination, which involves generating incorrect or fabricated facts. To address omissions, this paper introduces a dataset designed to evaluate such issues and proposes a frugal approach called EmbedKDECheck for detecting omissions in LLM-generated texts. The dataset, created in French, has been validated by medical experts to ensure it accurately represents real-world scenarios in the medical field. The objective is to develop a reference-free (black-box) method that can evaluate the reliability of summaries or reports without requiring significant computational resources, relying only on input and output. Unlike methods that rely on embeddings derived from the LLM itself, our approach uses embeddings generated by a third-party, lightweight NLP model based on a combination of FastText and Word2Vec. These embeddings are then combined with anomaly detection models to identify omissions effectively, making the method well-suited for resource-constrained environments. EmbedKDECheck was benchmarked against black-box state-of-the-art frameworks and models, including SelfCheckGPT, ChainPoll, and G-Eval, which leverage GPT. Results demonstrated its satisfactory performance in detecting omissions in LLM-generated summaries. This work advances frugal methodologies for evaluating the reliability of LLM-generated texts, with significant potential to improve the safety and accuracy of medical decision support systems in surgery and other healthcare domains.</abstract>
<identifier type="citekey">oukelmoun-etal-2025-detecting</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-industry.22</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.22/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>325</start>
<end>337</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Omissions in LLM-Generated Medical Summaries
%A Oukelmoun, Achir
%A Semmar, Nasredine
%A de Chalendar, Gaël
%A Cormi, Clement
%A Oukelmoun, Mariame
%A Vibert, Eric
%A Allard, Marc-Antoine
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F oukelmoun-etal-2025-detecting
%X With the emergence of Large Language Models (LLMs), numerous use cases have arisen in the medical field, particularly in generating summaries for consultation transcriptions and extensive medical reports. A major concern is that these summaries may omit critical information from the original input, potentially jeopardizing the decision-making process. This issue of omission is distinct from hallucination, which involves generating incorrect or fabricated facts. To address omissions, this paper introduces a dataset designed to evaluate such issues and proposes a frugal approach called EmbedKDECheck for detecting omissions in LLM-generated texts. The dataset, created in French, has been validated by medical experts to ensure it accurately represents real-world scenarios in the medical field. The objective is to develop a reference-free (black-box) method that can evaluate the reliability of summaries or reports without requiring significant computational resources, relying only on input and output. Unlike methods that rely on embeddings derived from the LLM itself, our approach uses embeddings generated by a third-party, lightweight NLP model based on a combination of FastText and Word2Vec. These embeddings are then combined with anomaly detection models to identify omissions effectively, making the method well-suited for resource-constrained environments. EmbedKDECheck was benchmarked against black-box state-of-the-art frameworks and models, including SelfCheckGPT, ChainPoll, and G-Eval, which leverage GPT. Results demonstrated its satisfactory performance in detecting omissions in LLM-generated summaries. This work advances frugal methodologies for evaluating the reliability of LLM-generated texts, with significant potential to improve the safety and accuracy of medical decision support systems in surgery and other healthcare domains.
%R 10.18653/v1/2025.emnlp-industry.22
%U https://aclanthology.org/2025.emnlp-industry.22/
%U https://doi.org/10.18653/v1/2025.emnlp-industry.22
%P 325-337
Markdown (Informal)
[Detecting Omissions in LLM-Generated Medical Summaries](https://aclanthology.org/2025.emnlp-industry.22/) (Oukelmoun et al., EMNLP 2025)
ACL
- Achir Oukelmoun, Nasredine Semmar, Gaël de Chalendar, Clement Cormi, Mariame Oukelmoun, Eric Vibert, and Marc-Antoine Allard. 2025. Detecting Omissions in LLM-Generated Medical Summaries. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 325–337, Suzhou (China). Association for Computational Linguistics.