@inproceedings{zhuang-etal-2023-p,
title = "{P}-{MNER}: Cross Modal Correction Fusion Network with Prompt Learning for Multimodal Named Entity Recognitiong",
author = "Zhuang, Wang and
Yijia, Zhang and
Kang, An and
Xiaoying, Zhou and
Mingyu, Lu and
Hongfei, Lin",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-1.59/",
pages = "689--700",
language = "eng",
abstract = "{\textquotedblleft}Multimodal Named Entity Recognition (MNER) is a challenging task in social mediadue to the combination of text and image features. Previous MNER work has focused onpredicting entity information after fusing visual and text features. However, pre-traininglanguage models have already acquired vast amounts of knowledge during their pre-training process. To leverage this knowledge, we propose a prompt network for MNERtasks (P-MNER).To minimize the noise generated by irrelevant areas in the image, wedesign a visual feature extraction model (FRR) based on FasterRCNN and ResNet, whichuses fine-grained visual features to assist MNER tasks. Moreover, we introduce a textcorrection fusion module (TCFM) into the model to address visual bias during modalfusion. We employ the idea of a residual network to modify the fused features using theoriginal text features. Our experiments on two benchmark datasets demonstrate that ourproposed model outperforms existing MNER methods. P-MNER`s ability to leveragepre-training knowledge from language models, incorporate fine-grained visual features,and correct for visual bias, makes it a promising approach for multimodal named entityrecognition in social media posts.{\textquotedblright}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhuang-etal-2023-p">
<titleInfo>
<title>P-MNER: Cross Modal Correction Fusion Network with Prompt Learning for Multimodal Named Entity Recognitiong</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wang</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhang</namePart>
<namePart type="family">Yijia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">An</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Xiaoying</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Mingyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Hongfei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“Multimodal Named Entity Recognition (MNER) is a challenging task in social mediadue to the combination of text and image features. Previous MNER work has focused onpredicting entity information after fusing visual and text features. However, pre-traininglanguage models have already acquired vast amounts of knowledge during their pre-training process. To leverage this knowledge, we propose a prompt network for MNERtasks (P-MNER).To minimize the noise generated by irrelevant areas in the image, wedesign a visual feature extraction model (FRR) based on FasterRCNN and ResNet, whichuses fine-grained visual features to assist MNER tasks. Moreover, we introduce a textcorrection fusion module (TCFM) into the model to address visual bias during modalfusion. We employ the idea of a residual network to modify the fused features using theoriginal text features. Our experiments on two benchmark datasets demonstrate that ourproposed model outperforms existing MNER methods. P-MNER‘s ability to leveragepre-training knowledge from language models, incorporate fine-grained visual features,and correct for visual bias, makes it a promising approach for multimodal named entityrecognition in social media posts.”</abstract>
<identifier type="citekey">zhuang-etal-2023-p</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-1.59/</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>689</start>
<end>700</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T P-MNER: Cross Modal Correction Fusion Network with Prompt Learning for Multimodal Named Entity Recognitiong
%A Zhuang, Wang
%A Yijia, Zhang
%A Kang, An
%A Xiaoying, Zhou
%A Mingyu, Lu
%A Hongfei, Lin
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G eng
%F zhuang-etal-2023-p
%X “Multimodal Named Entity Recognition (MNER) is a challenging task in social mediadue to the combination of text and image features. Previous MNER work has focused onpredicting entity information after fusing visual and text features. However, pre-traininglanguage models have already acquired vast amounts of knowledge during their pre-training process. To leverage this knowledge, we propose a prompt network for MNERtasks (P-MNER).To minimize the noise generated by irrelevant areas in the image, wedesign a visual feature extraction model (FRR) based on FasterRCNN and ResNet, whichuses fine-grained visual features to assist MNER tasks. Moreover, we introduce a textcorrection fusion module (TCFM) into the model to address visual bias during modalfusion. We employ the idea of a residual network to modify the fused features using theoriginal text features. Our experiments on two benchmark datasets demonstrate that ourproposed model outperforms existing MNER methods. P-MNER‘s ability to leveragepre-training knowledge from language models, incorporate fine-grained visual features,and correct for visual bias, makes it a promising approach for multimodal named entityrecognition in social media posts.”
%U https://aclanthology.org/2023.ccl-1.59/
%P 689-700
Markdown (Informal)
[P-MNER: Cross Modal Correction Fusion Network with Prompt Learning for Multimodal Named Entity Recognitiong](https://aclanthology.org/2023.ccl-1.59/) (Zhuang et al., CCL 2023)
ACL