@inproceedings{kim-etal-2025-learning-negative,
title = "Learning from Negative Samples in Biomedical Generative Entity Linking",
author = "Kim, Chanhwi and
Kim, Hyunjae and
Park, Sihyeon and
Lee, Jiwoo and
Sung, Mujeen and
Kang, Jaewoo",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.558/",
doi = "10.18653/v1/2025.findings-acl.558",
pages = "10714--10730",
ISBN = "979-8-89176-256-5",
abstract = "Generative models have become widely used in biomedical entity linking (BioEL) due to their excellent performance and efficient memory usage. However, these models are usually trained only with positive samples{---}entities that match the input mention{'}s identifier{---}and do not explicitly learn from hard negative samples, which are entities that look similar but have different meanings. To address this limitation, we introduce ANGEL (Learning from Negative Samples in Biomedical Generative Entity Linking), the first framework that trains generative BioEL models using negative samples. Specifically, a generative model is initially trained to generate positive entity names from the knowledge base for given input entities. Subsequently, both correct and incorrect outputs are gathered from the model{'}s top-k predictions. Finally, the model is updated to prioritize the correct predictions through preference optimization. Our models fine-tuned with ANGEL outperform the previous best baseline models by up to an average top-1 accuracy of 1.4{\%} on five benchmarks. When incorporating our framework into pre-training, the performance improvement increases further to 1.7{\%}, demonstrating its effectiveness in both the pre-training and fine-tuning stages. The code and model weights are available at https://github.com/dmis-lab/ANGEL."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2025-learning-negative">
<titleInfo>
<title>Learning from Negative Samples in Biomedical Generative Entity Linking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chanhwi</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyunjae</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sihyeon</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiwoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mujeen</namePart>
<namePart type="family">Sung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaewoo</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Generative models have become widely used in biomedical entity linking (BioEL) due to their excellent performance and efficient memory usage. However, these models are usually trained only with positive samples—entities that match the input mention’s identifier—and do not explicitly learn from hard negative samples, which are entities that look similar but have different meanings. To address this limitation, we introduce ANGEL (Learning from Negative Samples in Biomedical Generative Entity Linking), the first framework that trains generative BioEL models using negative samples. Specifically, a generative model is initially trained to generate positive entity names from the knowledge base for given input entities. Subsequently, both correct and incorrect outputs are gathered from the model’s top-k predictions. Finally, the model is updated to prioritize the correct predictions through preference optimization. Our models fine-tuned with ANGEL outperform the previous best baseline models by up to an average top-1 accuracy of 1.4% on five benchmarks. When incorporating our framework into pre-training, the performance improvement increases further to 1.7%, demonstrating its effectiveness in both the pre-training and fine-tuning stages. The code and model weights are available at https://github.com/dmis-lab/ANGEL.</abstract>
<identifier type="citekey">kim-etal-2025-learning-negative</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.558</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.558/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>10714</start>
<end>10730</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning from Negative Samples in Biomedical Generative Entity Linking
%A Kim, Chanhwi
%A Kim, Hyunjae
%A Park, Sihyeon
%A Lee, Jiwoo
%A Sung, Mujeen
%A Kang, Jaewoo
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F kim-etal-2025-learning-negative
%X Generative models have become widely used in biomedical entity linking (BioEL) due to their excellent performance and efficient memory usage. However, these models are usually trained only with positive samples—entities that match the input mention’s identifier—and do not explicitly learn from hard negative samples, which are entities that look similar but have different meanings. To address this limitation, we introduce ANGEL (Learning from Negative Samples in Biomedical Generative Entity Linking), the first framework that trains generative BioEL models using negative samples. Specifically, a generative model is initially trained to generate positive entity names from the knowledge base for given input entities. Subsequently, both correct and incorrect outputs are gathered from the model’s top-k predictions. Finally, the model is updated to prioritize the correct predictions through preference optimization. Our models fine-tuned with ANGEL outperform the previous best baseline models by up to an average top-1 accuracy of 1.4% on five benchmarks. When incorporating our framework into pre-training, the performance improvement increases further to 1.7%, demonstrating its effectiveness in both the pre-training and fine-tuning stages. The code and model weights are available at https://github.com/dmis-lab/ANGEL.
%R 10.18653/v1/2025.findings-acl.558
%U https://aclanthology.org/2025.findings-acl.558/
%U https://doi.org/10.18653/v1/2025.findings-acl.558
%P 10714-10730
Markdown (Informal)
[Learning from Negative Samples in Biomedical Generative Entity Linking](https://aclanthology.org/2025.findings-acl.558/) (Kim et al., Findings 2025)
ACL