@inproceedings{ibrahim-etal-2025-learning,
title = "Learning Word Embeddings from Glosses: A Multi-Loss Framework for {A}rabic Reverse Dictionary Tasks",
author = "Ibrahim, Engy and
Adel, Farhah and
Torki, Marwan and
El-Makky, Nagwa",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-main.31/",
pages = "384--388",
ISBN = "979-8-89176-352-4",
abstract = "We address the task of reverse dictionary modeling in Arabic, where the goal is to retrieve a target word given its definition. The task comprises two subtasks: (1) generating embeddings for Arabic words based on Arabic glosses, and (2) a cross-lingual setting where the gloss is in English and the target embedding is for the corresponding Arabic word. Prior approaches have largely relied on BERT models such as CAMeLBERT or MARBERT trained with mean squared error loss. In contrast, we propose a novel ensemble architecture that combines MARBERTv2 with the encoder of AraBART, and we demonstrate that the choice of loss function has a significant impact on performance. We apply contrastive loss to improve representational alignment, and introduce structural and center losses to better capture the semantic distribution of the dataset. This multi-loss framework enhances the quality of the learned embeddings and leads to consistent improvements in both monolingual and cross-lingual settings. Our system achieved the best rank metric in both subtasks compared to the previous approaches. These results highlight the effectiveness of combining architectural diversity with task-specific loss functions in representational tasks for morphologically rich languages like Arabic."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ibrahim-etal-2025-learning">
<titleInfo>
<title>Learning Word Embeddings from Glosses: A Multi-Loss Framework for Arabic Reverse Dictionary Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Engy</namePart>
<namePart type="family">Ibrahim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farhah</namePart>
<namePart type="family">Adel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marwan</namePart>
<namePart type="family">Torki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nagwa</namePart>
<namePart type="family">El-Makky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Third Arabic Natural Language Processing Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharefah</namePart>
<namePart type="family">Al-Ghamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakhar</namePart>
<namePart type="family">Alkhereyf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badr</namePart>
<namePart type="family">AlKhamissi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Injy</namePart>
<namePart type="family">Hamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zaid</namePart>
<namePart type="family">Alyafeai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Areeb</namePart>
<namePart type="family">Alowisheq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Go</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Waad</namePart>
<namePart type="family">Alshammari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-352-4</identifier>
</relatedItem>
<abstract>We address the task of reverse dictionary modeling in Arabic, where the goal is to retrieve a target word given its definition. The task comprises two subtasks: (1) generating embeddings for Arabic words based on Arabic glosses, and (2) a cross-lingual setting where the gloss is in English and the target embedding is for the corresponding Arabic word. Prior approaches have largely relied on BERT models such as CAMeLBERT or MARBERT trained with mean squared error loss. In contrast, we propose a novel ensemble architecture that combines MARBERTv2 with the encoder of AraBART, and we demonstrate that the choice of loss function has a significant impact on performance. We apply contrastive loss to improve representational alignment, and introduce structural and center losses to better capture the semantic distribution of the dataset. This multi-loss framework enhances the quality of the learned embeddings and leads to consistent improvements in both monolingual and cross-lingual settings. Our system achieved the best rank metric in both subtasks compared to the previous approaches. These results highlight the effectiveness of combining architectural diversity with task-specific loss functions in representational tasks for morphologically rich languages like Arabic.</abstract>
<identifier type="citekey">ibrahim-etal-2025-learning</identifier>
<location>
<url>https://aclanthology.org/2025.arabicnlp-main.31/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>384</start>
<end>388</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Word Embeddings from Glosses: A Multi-Loss Framework for Arabic Reverse Dictionary Tasks
%A Ibrahim, Engy
%A Adel, Farhah
%A Torki, Marwan
%A El-Makky, Nagwa
%Y Darwish, Kareem
%Y Ali, Ahmed
%Y Abu Farha, Ibrahim
%Y Touileb, Samia
%Y Zitouni, Imed
%Y Abdelali, Ahmed
%Y Al-Ghamdi, Sharefah
%Y Alkhereyf, Sakhar
%Y Zaghouani, Wajdi
%Y Khalifa, Salam
%Y AlKhamissi, Badr
%Y Almatham, Rawan
%Y Hamed, Injy
%Y Alyafeai, Zaid
%Y Alowisheq, Areeb
%Y Inoue, Go
%Y Mrini, Khalil
%Y Alshammari, Waad
%S Proceedings of The Third Arabic Natural Language Processing Conference
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-352-4
%F ibrahim-etal-2025-learning
%X We address the task of reverse dictionary modeling in Arabic, where the goal is to retrieve a target word given its definition. The task comprises two subtasks: (1) generating embeddings for Arabic words based on Arabic glosses, and (2) a cross-lingual setting where the gloss is in English and the target embedding is for the corresponding Arabic word. Prior approaches have largely relied on BERT models such as CAMeLBERT or MARBERT trained with mean squared error loss. In contrast, we propose a novel ensemble architecture that combines MARBERTv2 with the encoder of AraBART, and we demonstrate that the choice of loss function has a significant impact on performance. We apply contrastive loss to improve representational alignment, and introduce structural and center losses to better capture the semantic distribution of the dataset. This multi-loss framework enhances the quality of the learned embeddings and leads to consistent improvements in both monolingual and cross-lingual settings. Our system achieved the best rank metric in both subtasks compared to the previous approaches. These results highlight the effectiveness of combining architectural diversity with task-specific loss functions in representational tasks for morphologically rich languages like Arabic.
%U https://aclanthology.org/2025.arabicnlp-main.31/
%P 384-388
Markdown (Informal)
[Learning Word Embeddings from Glosses: A Multi-Loss Framework for Arabic Reverse Dictionary Tasks](https://aclanthology.org/2025.arabicnlp-main.31/) (Ibrahim et al., ArabicNLP 2025)
ACL