@inproceedings{zheng-etal-2024-enhancing,
title = "Enhancing Semantics in Multimodal Chain of Thought via Soft Negative Sampling",
author = "Zheng, Guangmin and
Wang, Jin and
Zhou, Xiaobing and
Zhang, Xuejie",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.537",
pages = "6059--6076",
abstract = "Chain of thought (CoT) has proven useful for problems requiring complex reasoning. Many of these problems are both textual and multimodal. Given the inputs in different modalities, a model generates a rationale and then uses it to answer a question. Because of the hallucination issue, the generated soft negative rationales with high textual quality but illogical semantics do not always help improve answer accuracy. This study proposes a rationale generation method using soft negative sampling (SNSE-CoT) to mitigate hallucinations in multimodal CoT. Five methods were applied to generate soft negative samples that shared highly similar text but had different semantics from the original. Bidirectional margin loss (BML) was applied to introduce them into the traditional contrastive learning framework that involves only positive and negative samples. Extensive experiments on the ScienceQA dataset demonstrated the effectiveness of the proposed method. Code and data are released at https://github.com/zgMin/SNSE-CoT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2024-enhancing">
<titleInfo>
<title>Enhancing Semantics in Multimodal Chain of Thought via Soft Negative Sampling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guangmin</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobing</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuejie</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Chain of thought (CoT) has proven useful for problems requiring complex reasoning. Many of these problems are both textual and multimodal. Given the inputs in different modalities, a model generates a rationale and then uses it to answer a question. Because of the hallucination issue, the generated soft negative rationales with high textual quality but illogical semantics do not always help improve answer accuracy. This study proposes a rationale generation method using soft negative sampling (SNSE-CoT) to mitigate hallucinations in multimodal CoT. Five methods were applied to generate soft negative samples that shared highly similar text but had different semantics from the original. Bidirectional margin loss (BML) was applied to introduce them into the traditional contrastive learning framework that involves only positive and negative samples. Extensive experiments on the ScienceQA dataset demonstrated the effectiveness of the proposed method. Code and data are released at https://github.com/zgMin/SNSE-CoT.</abstract>
<identifier type="citekey">zheng-etal-2024-enhancing</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.537</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>6059</start>
<end>6076</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Semantics in Multimodal Chain of Thought via Soft Negative Sampling
%A Zheng, Guangmin
%A Wang, Jin
%A Zhou, Xiaobing
%A Zhang, Xuejie
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F zheng-etal-2024-enhancing
%X Chain of thought (CoT) has proven useful for problems requiring complex reasoning. Many of these problems are both textual and multimodal. Given the inputs in different modalities, a model generates a rationale and then uses it to answer a question. Because of the hallucination issue, the generated soft negative rationales with high textual quality but illogical semantics do not always help improve answer accuracy. This study proposes a rationale generation method using soft negative sampling (SNSE-CoT) to mitigate hallucinations in multimodal CoT. Five methods were applied to generate soft negative samples that shared highly similar text but had different semantics from the original. Bidirectional margin loss (BML) was applied to introduce them into the traditional contrastive learning framework that involves only positive and negative samples. Extensive experiments on the ScienceQA dataset demonstrated the effectiveness of the proposed method. Code and data are released at https://github.com/zgMin/SNSE-CoT.
%U https://aclanthology.org/2024.lrec-main.537
%P 6059-6076
Markdown (Informal)
[Enhancing Semantics in Multimodal Chain of Thought via Soft Negative Sampling](https://aclanthology.org/2024.lrec-main.537) (Zheng et al., LREC-COLING 2024)
ACL