@inproceedings{rucker-akbik-2025-evaluating,
title = "Evaluating Design Decisions for Dual Encoder-based Entity Disambiguation",
author = {R{\"u}cker, Susanna and
Akbik, Alan},
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.764/",
doi = "10.18653/v1/2025.acl-long.764",
pages = "15685--15701",
ISBN = "979-8-89176-251-0",
abstract = "Entity disambiguation (ED) is the task of linking mentions in text to corresponding entries in a knowledge base. Dual Encoders address this by embedding mentions and label candidates in a shared embedding space and applying a similarity metric to predict the correct label. In this work, we focus on evaluating key design decisions for Dual Encoder-based ED, such as its loss function, similarity metric, label verbalization format, and negative sampling strategy. We present the resulting model VerbalizED, a document-level Dual Encoder model that includes contextual label verbalizations and efficient hard negative sampling. Additionally, we explore an iterative prediction variant that aims to improve the disambiguation of challenging data points. To support our analysis, we first conduct comprehensive ablation experiments on specific design decisions using AIDA-Yago, followed by large-scale, multi-domain evaluation on the ZELDA benchmark."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rucker-akbik-2025-evaluating">
<titleInfo>
<title>Evaluating Design Decisions for Dual Encoder-based Entity Disambiguation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Susanna</namePart>
<namePart type="family">Rücker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Akbik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Entity disambiguation (ED) is the task of linking mentions in text to corresponding entries in a knowledge base. Dual Encoders address this by embedding mentions and label candidates in a shared embedding space and applying a similarity metric to predict the correct label. In this work, we focus on evaluating key design decisions for Dual Encoder-based ED, such as its loss function, similarity metric, label verbalization format, and negative sampling strategy. We present the resulting model VerbalizED, a document-level Dual Encoder model that includes contextual label verbalizations and efficient hard negative sampling. Additionally, we explore an iterative prediction variant that aims to improve the disambiguation of challenging data points. To support our analysis, we first conduct comprehensive ablation experiments on specific design decisions using AIDA-Yago, followed by large-scale, multi-domain evaluation on the ZELDA benchmark.</abstract>
<identifier type="citekey">rucker-akbik-2025-evaluating</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.764</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.764/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>15685</start>
<end>15701</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Design Decisions for Dual Encoder-based Entity Disambiguation
%A Rücker, Susanna
%A Akbik, Alan
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F rucker-akbik-2025-evaluating
%X Entity disambiguation (ED) is the task of linking mentions in text to corresponding entries in a knowledge base. Dual Encoders address this by embedding mentions and label candidates in a shared embedding space and applying a similarity metric to predict the correct label. In this work, we focus on evaluating key design decisions for Dual Encoder-based ED, such as its loss function, similarity metric, label verbalization format, and negative sampling strategy. We present the resulting model VerbalizED, a document-level Dual Encoder model that includes contextual label verbalizations and efficient hard negative sampling. Additionally, we explore an iterative prediction variant that aims to improve the disambiguation of challenging data points. To support our analysis, we first conduct comprehensive ablation experiments on specific design decisions using AIDA-Yago, followed by large-scale, multi-domain evaluation on the ZELDA benchmark.
%R 10.18653/v1/2025.acl-long.764
%U https://aclanthology.org/2025.acl-long.764/
%U https://doi.org/10.18653/v1/2025.acl-long.764
%P 15685-15701
Markdown (Informal)
[Evaluating Design Decisions for Dual Encoder-based Entity Disambiguation](https://aclanthology.org/2025.acl-long.764/) (Rücker & Akbik, ACL 2025)
ACL