@inproceedings{cheng-etal-2025-compke,
title = "{COMPKE}: Complex Question Answering under Knowledge Editing",
author = "Cheng, Keyuan and
Kan, Zijian and
Zhang, Zhuoran and
Ali, Muhammad Asif and
Hu, Lijie and
Wang, Di",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.130/",
doi = "10.18653/v1/2025.findings-acl.130",
pages = "2557--2576",
ISBN = "979-8-89176-256-5",
abstract = "Knowledge Editing-Efficiently modifying the knowledge in large language models has gathered great attention. Current benchmarks primarily use multi-hop question answering to assess and analyze newly injected or updated knowledge. However, we argue that these benchmarks fail to effectively evaluate how well the updated models apply this knowledge in real-life scenarios, particularly when questions require complex reasoning involving one-to-many relationships or multi-step logical intersections. To fill in this gap, we introduce a new benchmark, COMPKE: Complex Question Answering under Knowledge Editing, which includes 11,924 complex questions that reflect real-life situations. We perform a comprehensive evaluation of four different knowledge editing methods in COMPKE, and our results show that the performance of these methods varies between different models. For example, MeLLo achieves an accuracy of 39.47 on GPT-4o-mini but drops significantly to 3.83 on Qwen2.5-3B. We further analyze the reasons behind these results from both methodological and model perspectives. Our dataset will be publicly available on GitHub."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cheng-etal-2025-compke">
<titleInfo>
<title>COMPKE: Complex Question Answering under Knowledge Editing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Keyuan</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zijian</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhuoran</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Asif</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lijie</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Di</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Knowledge Editing-Efficiently modifying the knowledge in large language models has gathered great attention. Current benchmarks primarily use multi-hop question answering to assess and analyze newly injected or updated knowledge. However, we argue that these benchmarks fail to effectively evaluate how well the updated models apply this knowledge in real-life scenarios, particularly when questions require complex reasoning involving one-to-many relationships or multi-step logical intersections. To fill in this gap, we introduce a new benchmark, COMPKE: Complex Question Answering under Knowledge Editing, which includes 11,924 complex questions that reflect real-life situations. We perform a comprehensive evaluation of four different knowledge editing methods in COMPKE, and our results show that the performance of these methods varies between different models. For example, MeLLo achieves an accuracy of 39.47 on GPT-4o-mini but drops significantly to 3.83 on Qwen2.5-3B. We further analyze the reasons behind these results from both methodological and model perspectives. Our dataset will be publicly available on GitHub.</abstract>
<identifier type="citekey">cheng-etal-2025-compke</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.130</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.130/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>2557</start>
<end>2576</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T COMPKE: Complex Question Answering under Knowledge Editing
%A Cheng, Keyuan
%A Kan, Zijian
%A Zhang, Zhuoran
%A Ali, Muhammad Asif
%A Hu, Lijie
%A Wang, Di
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F cheng-etal-2025-compke
%X Knowledge Editing-Efficiently modifying the knowledge in large language models has gathered great attention. Current benchmarks primarily use multi-hop question answering to assess and analyze newly injected or updated knowledge. However, we argue that these benchmarks fail to effectively evaluate how well the updated models apply this knowledge in real-life scenarios, particularly when questions require complex reasoning involving one-to-many relationships or multi-step logical intersections. To fill in this gap, we introduce a new benchmark, COMPKE: Complex Question Answering under Knowledge Editing, which includes 11,924 complex questions that reflect real-life situations. We perform a comprehensive evaluation of four different knowledge editing methods in COMPKE, and our results show that the performance of these methods varies between different models. For example, MeLLo achieves an accuracy of 39.47 on GPT-4o-mini but drops significantly to 3.83 on Qwen2.5-3B. We further analyze the reasons behind these results from both methodological and model perspectives. Our dataset will be publicly available on GitHub.
%R 10.18653/v1/2025.findings-acl.130
%U https://aclanthology.org/2025.findings-acl.130/
%U https://doi.org/10.18653/v1/2025.findings-acl.130
%P 2557-2576
Markdown (Informal)
[COMPKE: Complex Question Answering under Knowledge Editing](https://aclanthology.org/2025.findings-acl.130/) (Cheng et al., Findings 2025)
ACL
- Keyuan Cheng, Zijian Kan, Zhuoran Zhang, Muhammad Asif Ali, Lijie Hu, and Di Wang. 2025. COMPKE: Complex Question Answering under Knowledge Editing. In Findings of the Association for Computational Linguistics: ACL 2025, pages 2557–2576, Vienna, Austria. Association for Computational Linguistics.