@inproceedings{sun-etal-2026-task,
title = "Task Matters: Knowledge Requirements Shape {LLM} Responses to Context{--}Memory Conflict",
author = "Sun, Kaiser and
Bai, Fan and
Dredze, Mark",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.202/",
pages = "4154--4176",
ISBN = "979-8-89176-395-1",
abstract = "Large language models (LLMs) rely on both contextual knowledge and parametric memory, yet these sources can conflict. Prior analysis largely focused on contextual question answering, suggesting that models tend to favor parametric knowledge under conflict, but this setting assumes that tasks should always rely on the provided passage. It therefore remains unclear how LLMs behave when \textit{tasks demand different kinds and degrees of knowledge utilization}. We address this gap with a model-agnostic diagnostic framework that holds underlying knowledge constant while injecting controlled conflicts across tasks with varying knowledge requirements. Evaluating representative open-source LLMs, we find that: (1) performance degradation under conflict correlates with a task{'}s knowledge reliance rather than conflict plausibility alone; (2) strategies such as explanatory rationales or reiteration increase context reliance, helping context-only tasks but harming those that require parametric knowledge; and (3) these behaviors bias model-based evaluation, raising concerns about the reliability of LLMs as judges. Together, our findings show that context{--}memory conflict is fundamentally task-dependent and motivate task-aware approaches to balancing context and memory in LLM deployment and evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sun-etal-2026-task">
<titleInfo>
<title>Task Matters: Knowledge Requirements Shape LLM Responses to Context–Memory Conflict</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaiser</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dredze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Large language models (LLMs) rely on both contextual knowledge and parametric memory, yet these sources can conflict. Prior analysis largely focused on contextual question answering, suggesting that models tend to favor parametric knowledge under conflict, but this setting assumes that tasks should always rely on the provided passage. It therefore remains unclear how LLMs behave when tasks demand different kinds and degrees of knowledge utilization. We address this gap with a model-agnostic diagnostic framework that holds underlying knowledge constant while injecting controlled conflicts across tasks with varying knowledge requirements. Evaluating representative open-source LLMs, we find that: (1) performance degradation under conflict correlates with a task’s knowledge reliance rather than conflict plausibility alone; (2) strategies such as explanatory rationales or reiteration increase context reliance, helping context-only tasks but harming those that require parametric knowledge; and (3) these behaviors bias model-based evaluation, raising concerns about the reliability of LLMs as judges. Together, our findings show that context–memory conflict is fundamentally task-dependent and motivate task-aware approaches to balancing context and memory in LLM deployment and evaluation.</abstract>
<identifier type="citekey">sun-etal-2026-task</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.202/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>4154</start>
<end>4176</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Task Matters: Knowledge Requirements Shape LLM Responses to Context–Memory Conflict
%A Sun, Kaiser
%A Bai, Fan
%A Dredze, Mark
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F sun-etal-2026-task
%X Large language models (LLMs) rely on both contextual knowledge and parametric memory, yet these sources can conflict. Prior analysis largely focused on contextual question answering, suggesting that models tend to favor parametric knowledge under conflict, but this setting assumes that tasks should always rely on the provided passage. It therefore remains unclear how LLMs behave when tasks demand different kinds and degrees of knowledge utilization. We address this gap with a model-agnostic diagnostic framework that holds underlying knowledge constant while injecting controlled conflicts across tasks with varying knowledge requirements. Evaluating representative open-source LLMs, we find that: (1) performance degradation under conflict correlates with a task’s knowledge reliance rather than conflict plausibility alone; (2) strategies such as explanatory rationales or reiteration increase context reliance, helping context-only tasks but harming those that require parametric knowledge; and (3) these behaviors bias model-based evaluation, raising concerns about the reliability of LLMs as judges. Together, our findings show that context–memory conflict is fundamentally task-dependent and motivate task-aware approaches to balancing context and memory in LLM deployment and evaluation.
%U https://aclanthology.org/2026.findings-acl.202/
%P 4154-4176
Markdown (Informal)
[Task Matters: Knowledge Requirements Shape LLM Responses to Context–Memory Conflict](https://aclanthology.org/2026.findings-acl.202/) (Sun et al., Findings 2026)
ACL