@inproceedings{zeng-etal-2026-cogevolve,
title = "{C}og{E}volve: A Multimodal Benchmark for Evaluating Relational Reasoning in Semantic Extension",
author = "Zeng, Jingjie and
Li, Huayang and
Yang, Liang and
Sun, Yuanyuan and
Zhang, Shaowu and
Lin, Hongfei",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1190/",
pages = "25943--25960",
ISBN = "979-8-89176-390-6",
abstract = "Human cognition excels at extending knowledge through analogy, where word meanings evolve along structured pathways from concrete prototypes to abstract senses via metaphor and metonymy. Do Large Language Models (LLMs) internalize this generative logic, or merely mimic statistical patterns? To investigate this, we introduce CogEvolve, a cognitive linguistic benchmark designed to test these evolutionary pathways across textual and visual modalities. Our evaluation reveals a distinct cognitive profile: models function as ``Super-Associators'' expert at static recognition yet fail at causal reasoning. In text, they exhibit a Frequency-Primacy Conflation, confusing statistical prevalence with cognitive basicness. Crucially, this reasoning collapses further in the visual domain. We term this deficit the Ungrounded Arrow: models possess high-fidelity concept representations (the ``dots'') but lack the transformational operators (the ``arrows'') essential for true relational understanding."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zeng-etal-2026-cogevolve">
<titleInfo>
<title>CogEvolve: A Multimodal Benchmark for Evaluating Relational Reasoning in Semantic Extension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jingjie</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huayang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanyuan</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaowu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongfei</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Human cognition excels at extending knowledge through analogy, where word meanings evolve along structured pathways from concrete prototypes to abstract senses via metaphor and metonymy. Do Large Language Models (LLMs) internalize this generative logic, or merely mimic statistical patterns? To investigate this, we introduce CogEvolve, a cognitive linguistic benchmark designed to test these evolutionary pathways across textual and visual modalities. Our evaluation reveals a distinct cognitive profile: models function as “Super-Associators” expert at static recognition yet fail at causal reasoning. In text, they exhibit a Frequency-Primacy Conflation, confusing statistical prevalence with cognitive basicness. Crucially, this reasoning collapses further in the visual domain. We term this deficit the Ungrounded Arrow: models possess high-fidelity concept representations (the “dots”) but lack the transformational operators (the “arrows”) essential for true relational understanding.</abstract>
<identifier type="citekey">zeng-etal-2026-cogevolve</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1190/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>25943</start>
<end>25960</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CogEvolve: A Multimodal Benchmark for Evaluating Relational Reasoning in Semantic Extension
%A Zeng, Jingjie
%A Li, Huayang
%A Yang, Liang
%A Sun, Yuanyuan
%A Zhang, Shaowu
%A Lin, Hongfei
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F zeng-etal-2026-cogevolve
%X Human cognition excels at extending knowledge through analogy, where word meanings evolve along structured pathways from concrete prototypes to abstract senses via metaphor and metonymy. Do Large Language Models (LLMs) internalize this generative logic, or merely mimic statistical patterns? To investigate this, we introduce CogEvolve, a cognitive linguistic benchmark designed to test these evolutionary pathways across textual and visual modalities. Our evaluation reveals a distinct cognitive profile: models function as “Super-Associators” expert at static recognition yet fail at causal reasoning. In text, they exhibit a Frequency-Primacy Conflation, confusing statistical prevalence with cognitive basicness. Crucially, this reasoning collapses further in the visual domain. We term this deficit the Ungrounded Arrow: models possess high-fidelity concept representations (the “dots”) but lack the transformational operators (the “arrows”) essential for true relational understanding.
%U https://aclanthology.org/2026.acl-long.1190/
%P 25943-25960
Markdown (Informal)
[CogEvolve: A Multimodal Benchmark for Evaluating Relational Reasoning in Semantic Extension](https://aclanthology.org/2026.acl-long.1190/) (Zeng et al., ACL 2026)
ACL