@inproceedings{xiao-etal-2025-multi,
title = "Multi-Modal Entities Matter: Benchmarking Multi-Modal Entity Alignment",
author = "Xiao, GuanChen and
Zeng, WeiXin and
Zhang, ShiQi and
Lao, MingRui and
Zhao, Xiang",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.582/",
pages = "8714--8724",
abstract = "Multi-modal entity alignment (MMEA) is a long-standing task that aims to discover identical entities between different multi-modal knowledge graphs (MMKGs). However, most of the existing MMEA datasets consider the multi-modal data as the attributes of textual entities, while neglecting the correlations among the multi-modal data and do not fit in the real-world scenarios well. In response, in this work, we establish a novel yet practical MMEA dataset, i.e. NMMEA, which models multi-modal data (e.g., images) equally as textual entities in the MMKG. Due to the introduction of multi-modal data, NMMEA poses new challenges to existing MMEA solutions, i.e., heterogeneous structural representation learning and cross-modal alignment inference. Hence, we put forward a simple yet effective solution, CrossEA, which can effectively learn the structural information of entities by considering both intra-modal and cross-modal relations, and further infer the similarity of different types of entity pairs. Extensive experiments validate the significance of NMMEA, where CrossEA can achieve superior performance in contrast to competitive methods on the proposed dataset."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiao-etal-2025-multi">
<titleInfo>
<title>Multi-Modal Entities Matter: Benchmarking Multi-Modal Entity Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">GuanChen</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">WeiXin</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">ShiQi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">MingRui</namePart>
<namePart type="family">Lao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multi-modal entity alignment (MMEA) is a long-standing task that aims to discover identical entities between different multi-modal knowledge graphs (MMKGs). However, most of the existing MMEA datasets consider the multi-modal data as the attributes of textual entities, while neglecting the correlations among the multi-modal data and do not fit in the real-world scenarios well. In response, in this work, we establish a novel yet practical MMEA dataset, i.e. NMMEA, which models multi-modal data (e.g., images) equally as textual entities in the MMKG. Due to the introduction of multi-modal data, NMMEA poses new challenges to existing MMEA solutions, i.e., heterogeneous structural representation learning and cross-modal alignment inference. Hence, we put forward a simple yet effective solution, CrossEA, which can effectively learn the structural information of entities by considering both intra-modal and cross-modal relations, and further infer the similarity of different types of entity pairs. Extensive experiments validate the significance of NMMEA, where CrossEA can achieve superior performance in contrast to competitive methods on the proposed dataset.</abstract>
<identifier type="citekey">xiao-etal-2025-multi</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.582/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>8714</start>
<end>8724</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Modal Entities Matter: Benchmarking Multi-Modal Entity Alignment
%A Xiao, GuanChen
%A Zeng, WeiXin
%A Zhang, ShiQi
%A Lao, MingRui
%A Zhao, Xiang
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F xiao-etal-2025-multi
%X Multi-modal entity alignment (MMEA) is a long-standing task that aims to discover identical entities between different multi-modal knowledge graphs (MMKGs). However, most of the existing MMEA datasets consider the multi-modal data as the attributes of textual entities, while neglecting the correlations among the multi-modal data and do not fit in the real-world scenarios well. In response, in this work, we establish a novel yet practical MMEA dataset, i.e. NMMEA, which models multi-modal data (e.g., images) equally as textual entities in the MMKG. Due to the introduction of multi-modal data, NMMEA poses new challenges to existing MMEA solutions, i.e., heterogeneous structural representation learning and cross-modal alignment inference. Hence, we put forward a simple yet effective solution, CrossEA, which can effectively learn the structural information of entities by considering both intra-modal and cross-modal relations, and further infer the similarity of different types of entity pairs. Extensive experiments validate the significance of NMMEA, where CrossEA can achieve superior performance in contrast to competitive methods on the proposed dataset.
%U https://aclanthology.org/2025.coling-main.582/
%P 8714-8724
Markdown (Informal)
[Multi-Modal Entities Matter: Benchmarking Multi-Modal Entity Alignment](https://aclanthology.org/2025.coling-main.582/) (Xiao et al., COLING 2025)
ACL