@inproceedings{bhattarai-etal-2025-heal,
title = "{HEAL}: Hierarchical Embedding Alignment Loss for Improved Retrieval and Representation Learning",
author = "Bhattarai, Manish and
Barron, Ryan and
Eren, Maksim and
Vu, Minh and
Grantcharov, Vesselin and
Boureima, Ismael and
Stanev, Valentin and
Matuszek, Cynthia and
Valtchinov, Vladimir and
Rasmussen, Kim and
Alexandrov, Boian",
editor = "Shi, Weijia and
Yu, Wenhao and
Asai, Akari and
Jiang, Meng and
Durrett, Greg and
Hajishirzi, Hannaneh and
Zettlemoyer, Luke",
booktitle = "Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.knowledgenlp-1.19/",
doi = "10.18653/v1/2025.knowledgenlp-1.19",
pages = "205--214",
ISBN = "979-8-89176-229-9",
abstract = "Retrieval-Augmented Generation (RAG) enhances Large Language Models (LLMs) by integrating external document retrieval to provide domain-specific or up-to-date knowledge. The effectiveness of RAG depends on the relevance of retrieved documents, which is influenced by the semantic alignment of embeddings with the domain{'}s specialized content. Although full fine-tuning can align language models to specific domains, it is computationally intensive and demands substantial data. This paper introduces Hierarchical Embedding Alignment Loss (HEAL), a novel method that leverages hierarchical fuzzy clustering with matrix factorization within contrastive learning to efficiently align LLM embeddings with domain-specific content. HEAL computes level/depth-wise contrastive losses and incorporates hierarchical penalties to align embeddings with the underlying relationships in label hierarchies. This approach enhances retrieval relevance and document classification, effectively reducing hallucinations in LLM outputs. In our experiments, we benchmark and evaluate HEAL across diverse domains, including Healthcare, Material Science, Cyber-security, and Applied Maths."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhattarai-etal-2025-heal">
<titleInfo>
<title>HEAL: Hierarchical Embedding Alignment Loss for Improved Retrieval and Representation Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Bhattarai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Barron</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maksim</namePart>
<namePart type="family">Eren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minh</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vesselin</namePart>
<namePart type="family">Grantcharov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ismael</namePart>
<namePart type="family">Boureima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Stanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cynthia</namePart>
<namePart type="family">Matuszek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Valtchinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kim</namePart>
<namePart type="family">Rasmussen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boian</namePart>
<namePart type="family">Alexandrov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weijia</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akari</namePart>
<namePart type="family">Asai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-229-9</identifier>
</relatedItem>
<abstract>Retrieval-Augmented Generation (RAG) enhances Large Language Models (LLMs) by integrating external document retrieval to provide domain-specific or up-to-date knowledge. The effectiveness of RAG depends on the relevance of retrieved documents, which is influenced by the semantic alignment of embeddings with the domain’s specialized content. Although full fine-tuning can align language models to specific domains, it is computationally intensive and demands substantial data. This paper introduces Hierarchical Embedding Alignment Loss (HEAL), a novel method that leverages hierarchical fuzzy clustering with matrix factorization within contrastive learning to efficiently align LLM embeddings with domain-specific content. HEAL computes level/depth-wise contrastive losses and incorporates hierarchical penalties to align embeddings with the underlying relationships in label hierarchies. This approach enhances retrieval relevance and document classification, effectively reducing hallucinations in LLM outputs. In our experiments, we benchmark and evaluate HEAL across diverse domains, including Healthcare, Material Science, Cyber-security, and Applied Maths.</abstract>
<identifier type="citekey">bhattarai-etal-2025-heal</identifier>
<identifier type="doi">10.18653/v1/2025.knowledgenlp-1.19</identifier>
<location>
<url>https://aclanthology.org/2025.knowledgenlp-1.19/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>205</start>
<end>214</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HEAL: Hierarchical Embedding Alignment Loss for Improved Retrieval and Representation Learning
%A Bhattarai, Manish
%A Barron, Ryan
%A Eren, Maksim
%A Vu, Minh
%A Grantcharov, Vesselin
%A Boureima, Ismael
%A Stanev, Valentin
%A Matuszek, Cynthia
%A Valtchinov, Vladimir
%A Rasmussen, Kim
%A Alexandrov, Boian
%Y Shi, Weijia
%Y Yu, Wenhao
%Y Asai, Akari
%Y Jiang, Meng
%Y Durrett, Greg
%Y Hajishirzi, Hannaneh
%Y Zettlemoyer, Luke
%S Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, USA
%@ 979-8-89176-229-9
%F bhattarai-etal-2025-heal
%X Retrieval-Augmented Generation (RAG) enhances Large Language Models (LLMs) by integrating external document retrieval to provide domain-specific or up-to-date knowledge. The effectiveness of RAG depends on the relevance of retrieved documents, which is influenced by the semantic alignment of embeddings with the domain’s specialized content. Although full fine-tuning can align language models to specific domains, it is computationally intensive and demands substantial data. This paper introduces Hierarchical Embedding Alignment Loss (HEAL), a novel method that leverages hierarchical fuzzy clustering with matrix factorization within contrastive learning to efficiently align LLM embeddings with domain-specific content. HEAL computes level/depth-wise contrastive losses and incorporates hierarchical penalties to align embeddings with the underlying relationships in label hierarchies. This approach enhances retrieval relevance and document classification, effectively reducing hallucinations in LLM outputs. In our experiments, we benchmark and evaluate HEAL across diverse domains, including Healthcare, Material Science, Cyber-security, and Applied Maths.
%R 10.18653/v1/2025.knowledgenlp-1.19
%U https://aclanthology.org/2025.knowledgenlp-1.19/
%U https://doi.org/10.18653/v1/2025.knowledgenlp-1.19
%P 205-214
Markdown (Informal)
[HEAL: Hierarchical Embedding Alignment Loss for Improved Retrieval and Representation Learning](https://aclanthology.org/2025.knowledgenlp-1.19/) (Bhattarai et al., KnowledgeNLP 2025)
ACL
- Manish Bhattarai, Ryan Barron, Maksim Eren, Minh Vu, Vesselin Grantcharov, Ismael Boureima, Valentin Stanev, Cynthia Matuszek, Vladimir Valtchinov, Kim Rasmussen, and Boian Alexandrov. 2025. HEAL: Hierarchical Embedding Alignment Loss for Improved Retrieval and Representation Learning. In Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing, pages 205–214, Albuquerque, New Mexico, USA. Association for Computational Linguistics.