@inproceedings{zhang-etal-2024-gome,
title = "{GOME}: Grounding-based Metaphor Binding With Conceptual Elaboration For Figurative Language Illustration",
author = "Zhang, Linhao and
Liu, Jintao and
Jin, Li and
Wang, Hao and
Wei, Kaiwen and
Xu, Guangluan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1028",
pages = "18500--18510",
abstract = "The illustration or visualization of figurative language, such as linguistic metaphors, is an emerging challenge for existing Large Language Models (LLMs) and multimodal models. Due to their comparison of seemingly unrelated concepts in metaphors, existing LLMs have a tendency of over-literalization, which illustrates figurative language solely based on literal objects, ignoring the underlying groundings and associations across disparate metaphorical domains. Furthermore, prior approaches have ignored the binding process between visual objects and metaphorical attributes, which further intensifies the infidelity of visual metaphors. To address the issues above, we propose GOME (Grounding-based Metaphor Binding), which illustrates linguistic metaphors from the grounding perspective elaborated through LLMs. GOME consists of two steps for metaphor illustration, including grounding-based elaboration and scenario visualization. In the elaboration step, metaphorical knowledge is integrated into systematic instructions for LLMs, which employs a CoT prompting method rooted in rhetoric. This approach specifies metaphorical devices such as vehicles and groundings, to ensure accurate and faithful descriptions consumed by text-to-image models. In the visualization step, an inference-time metaphor binding method is realized based on elaboration outputs, which register attentional control during the diffusion process, and captures the underlying attributes from the abstract metaphorical domain. Comprehensive evaluations using multiple downstream tasks confirm that, GOME is superior to isolated LLMs, diffusion models, or their direct collaboration.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2024-gome">
<titleInfo>
<title>GOME: Grounding-based Metaphor Binding With Conceptual Elaboration For Figurative Language Illustration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Linhao</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jintao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaiwen</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guangluan</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The illustration or visualization of figurative language, such as linguistic metaphors, is an emerging challenge for existing Large Language Models (LLMs) and multimodal models. Due to their comparison of seemingly unrelated concepts in metaphors, existing LLMs have a tendency of over-literalization, which illustrates figurative language solely based on literal objects, ignoring the underlying groundings and associations across disparate metaphorical domains. Furthermore, prior approaches have ignored the binding process between visual objects and metaphorical attributes, which further intensifies the infidelity of visual metaphors. To address the issues above, we propose GOME (Grounding-based Metaphor Binding), which illustrates linguistic metaphors from the grounding perspective elaborated through LLMs. GOME consists of two steps for metaphor illustration, including grounding-based elaboration and scenario visualization. In the elaboration step, metaphorical knowledge is integrated into systematic instructions for LLMs, which employs a CoT prompting method rooted in rhetoric. This approach specifies metaphorical devices such as vehicles and groundings, to ensure accurate and faithful descriptions consumed by text-to-image models. In the visualization step, an inference-time metaphor binding method is realized based on elaboration outputs, which register attentional control during the diffusion process, and captures the underlying attributes from the abstract metaphorical domain. Comprehensive evaluations using multiple downstream tasks confirm that, GOME is superior to isolated LLMs, diffusion models, or their direct collaboration.</abstract>
<identifier type="citekey">zhang-etal-2024-gome</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1028</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>18500</start>
<end>18510</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GOME: Grounding-based Metaphor Binding With Conceptual Elaboration For Figurative Language Illustration
%A Zhang, Linhao
%A Liu, Jintao
%A Jin, Li
%A Wang, Hao
%A Wei, Kaiwen
%A Xu, Guangluan
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhang-etal-2024-gome
%X The illustration or visualization of figurative language, such as linguistic metaphors, is an emerging challenge for existing Large Language Models (LLMs) and multimodal models. Due to their comparison of seemingly unrelated concepts in metaphors, existing LLMs have a tendency of over-literalization, which illustrates figurative language solely based on literal objects, ignoring the underlying groundings and associations across disparate metaphorical domains. Furthermore, prior approaches have ignored the binding process between visual objects and metaphorical attributes, which further intensifies the infidelity of visual metaphors. To address the issues above, we propose GOME (Grounding-based Metaphor Binding), which illustrates linguistic metaphors from the grounding perspective elaborated through LLMs. GOME consists of two steps for metaphor illustration, including grounding-based elaboration and scenario visualization. In the elaboration step, metaphorical knowledge is integrated into systematic instructions for LLMs, which employs a CoT prompting method rooted in rhetoric. This approach specifies metaphorical devices such as vehicles and groundings, to ensure accurate and faithful descriptions consumed by text-to-image models. In the visualization step, an inference-time metaphor binding method is realized based on elaboration outputs, which register attentional control during the diffusion process, and captures the underlying attributes from the abstract metaphorical domain. Comprehensive evaluations using multiple downstream tasks confirm that, GOME is superior to isolated LLMs, diffusion models, or their direct collaboration.
%U https://aclanthology.org/2024.emnlp-main.1028
%P 18500-18510
Markdown (Informal)
[GOME: Grounding-based Metaphor Binding With Conceptual Elaboration For Figurative Language Illustration](https://aclanthology.org/2024.emnlp-main.1028) (Zhang et al., EMNLP 2024)
ACL