@inproceedings{lu-etal-2024-shot,
title = "Few-Shot Multimodal Named Entity Recognition Based on Mutlimodal Causal Intervention Graph",
author = "Lu, Feihong and
Yang, Xiaocui and
Li, Qian and
Sun, Qingyun and
Jiang, Ke and
Ji, Cheng and
Li, Jianxin",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.633",
pages = "7208--7219",
abstract = "Multimodal Named Entity Recognition (MNER) models typically require a significant volume of labeled data for effective training to extract relations between entities. In real-world scenarios, we frequently encounter unseen relation types. Nevertheless, existing methods are predominantly tailored for complete datasets and are not equipped to handle these new relation types. In this paper, we introduce the Few-shot Multimodal Named Entity Recognition (FMNER) task to address these novel relation types. FMNER trains in the source domain (seen types) and tests in the target domain (unseen types) with different distributions. Due to limited available resources for sampling, each sampling instance yields different content, resulting in data bias and alignment problems of multimodal units (image patches and words). To alleviate the above challenge, we propose a novel Multimodal causal Intervention graphs (MOUSING) model for FMNER. Specifically, we begin by constructing a multimodal graph that incorporates fine-grained information from multiple modalities. Subsequently, we introduce the Multimodal Causal Intervention Strategy to update the multimodal graph. It aims to decrease spurious correlations and emphasize accurate correlations between multimodal units, resulting in effectively aligned multimodal representations. Extensive experiments on two multimodal named entity recognition datasets demonstrate the superior performance of our model in the few-shot setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2024-shot">
<titleInfo>
<title>Few-Shot Multimodal Named Entity Recognition Based on Mutlimodal Causal Intervention Graph</title>
</titleInfo>
<name type="personal">
<namePart type="given">Feihong</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaocui</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyun</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianxin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multimodal Named Entity Recognition (MNER) models typically require a significant volume of labeled data for effective training to extract relations between entities. In real-world scenarios, we frequently encounter unseen relation types. Nevertheless, existing methods are predominantly tailored for complete datasets and are not equipped to handle these new relation types. In this paper, we introduce the Few-shot Multimodal Named Entity Recognition (FMNER) task to address these novel relation types. FMNER trains in the source domain (seen types) and tests in the target domain (unseen types) with different distributions. Due to limited available resources for sampling, each sampling instance yields different content, resulting in data bias and alignment problems of multimodal units (image patches and words). To alleviate the above challenge, we propose a novel Multimodal causal Intervention graphs (MOUSING) model for FMNER. Specifically, we begin by constructing a multimodal graph that incorporates fine-grained information from multiple modalities. Subsequently, we introduce the Multimodal Causal Intervention Strategy to update the multimodal graph. It aims to decrease spurious correlations and emphasize accurate correlations between multimodal units, resulting in effectively aligned multimodal representations. Extensive experiments on two multimodal named entity recognition datasets demonstrate the superior performance of our model in the few-shot setting.</abstract>
<identifier type="citekey">lu-etal-2024-shot</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.633</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7208</start>
<end>7219</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Few-Shot Multimodal Named Entity Recognition Based on Mutlimodal Causal Intervention Graph
%A Lu, Feihong
%A Yang, Xiaocui
%A Li, Qian
%A Sun, Qingyun
%A Jiang, Ke
%A Ji, Cheng
%A Li, Jianxin
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F lu-etal-2024-shot
%X Multimodal Named Entity Recognition (MNER) models typically require a significant volume of labeled data for effective training to extract relations between entities. In real-world scenarios, we frequently encounter unseen relation types. Nevertheless, existing methods are predominantly tailored for complete datasets and are not equipped to handle these new relation types. In this paper, we introduce the Few-shot Multimodal Named Entity Recognition (FMNER) task to address these novel relation types. FMNER trains in the source domain (seen types) and tests in the target domain (unseen types) with different distributions. Due to limited available resources for sampling, each sampling instance yields different content, resulting in data bias and alignment problems of multimodal units (image patches and words). To alleviate the above challenge, we propose a novel Multimodal causal Intervention graphs (MOUSING) model for FMNER. Specifically, we begin by constructing a multimodal graph that incorporates fine-grained information from multiple modalities. Subsequently, we introduce the Multimodal Causal Intervention Strategy to update the multimodal graph. It aims to decrease spurious correlations and emphasize accurate correlations between multimodal units, resulting in effectively aligned multimodal representations. Extensive experiments on two multimodal named entity recognition datasets demonstrate the superior performance of our model in the few-shot setting.
%U https://aclanthology.org/2024.lrec-main.633
%P 7208-7219
Markdown (Informal)
[Few-Shot Multimodal Named Entity Recognition Based on Mutlimodal Causal Intervention Graph](https://aclanthology.org/2024.lrec-main.633) (Lu et al., LREC-COLING 2024)
ACL
- Feihong Lu, Xiaocui Yang, Qian Li, Qingyun Sun, Ke Jiang, Cheng Ji, and Jianxin Li. 2024. Few-Shot Multimodal Named Entity Recognition Based on Mutlimodal Causal Intervention Graph. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 7208–7219, Torino, Italia. ELRA and ICCL.