@inproceedings{li-etal-2025-language,
title = "Language Modeling with Editable External Knowledge",
author = "Li, Belinda Z. and
Liu, Emmy and
Ross, Alexis and
Zeitoun, Abbas and
Neubig, Graham and
Andreas, Jacob",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.168/",
doi = "10.18653/v1/2025.findings-naacl.168",
pages = "3070--3090",
ISBN = "979-8-89176-195-7",
abstract = "When the world changes, so does the text that people write about it. How do we build language models that can be easily updated to reflect these changes? One popular approach is retrieval-augmented generation (RAG), in which new documents are inserted into a knowledge base and retrieved during prediction for downstream tasks. Most prior work on RAG has focused on improving model behavior during *prediction* through better retrieval or reasoning. This paper introduces ERASE, which instead improves model behavior **when new documents are acquired**, by incrementally deleting or rewriting other entries in the knowledge base each time a document is added. In two new datasets evaluating models' ability to answer questions about a stream of news articles or conversations, ERASE improves accuracy relative to conventional retrieval-augmented generation by 7-13{\%} (Mixtral-8x7B) and 6-10{\%} (Llama-3-8B) absolute. This improvement is complementary to improved retrieval or reasoning for RAG: we demonstrate an 11{\%} improvement by applying ERASE to SelfRAG."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-language">
<titleInfo>
<title>Language Modeling with Editable External Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Belinda</namePart>
<namePart type="given">Z</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmy</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Ross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abbas</namePart>
<namePart type="family">Zeitoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Andreas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>When the world changes, so does the text that people write about it. How do we build language models that can be easily updated to reflect these changes? One popular approach is retrieval-augmented generation (RAG), in which new documents are inserted into a knowledge base and retrieved during prediction for downstream tasks. Most prior work on RAG has focused on improving model behavior during *prediction* through better retrieval or reasoning. This paper introduces ERASE, which instead improves model behavior **when new documents are acquired**, by incrementally deleting or rewriting other entries in the knowledge base each time a document is added. In two new datasets evaluating models’ ability to answer questions about a stream of news articles or conversations, ERASE improves accuracy relative to conventional retrieval-augmented generation by 7-13% (Mixtral-8x7B) and 6-10% (Llama-3-8B) absolute. This improvement is complementary to improved retrieval or reasoning for RAG: we demonstrate an 11% improvement by applying ERASE to SelfRAG.</abstract>
<identifier type="citekey">li-etal-2025-language</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.168</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.168/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>3070</start>
<end>3090</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Modeling with Editable External Knowledge
%A Li, Belinda Z.
%A Liu, Emmy
%A Ross, Alexis
%A Zeitoun, Abbas
%A Neubig, Graham
%A Andreas, Jacob
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F li-etal-2025-language
%X When the world changes, so does the text that people write about it. How do we build language models that can be easily updated to reflect these changes? One popular approach is retrieval-augmented generation (RAG), in which new documents are inserted into a knowledge base and retrieved during prediction for downstream tasks. Most prior work on RAG has focused on improving model behavior during *prediction* through better retrieval or reasoning. This paper introduces ERASE, which instead improves model behavior **when new documents are acquired**, by incrementally deleting or rewriting other entries in the knowledge base each time a document is added. In two new datasets evaluating models’ ability to answer questions about a stream of news articles or conversations, ERASE improves accuracy relative to conventional retrieval-augmented generation by 7-13% (Mixtral-8x7B) and 6-10% (Llama-3-8B) absolute. This improvement is complementary to improved retrieval or reasoning for RAG: we demonstrate an 11% improvement by applying ERASE to SelfRAG.
%R 10.18653/v1/2025.findings-naacl.168
%U https://aclanthology.org/2025.findings-naacl.168/
%U https://doi.org/10.18653/v1/2025.findings-naacl.168
%P 3070-3090
Markdown (Informal)
[Language Modeling with Editable External Knowledge](https://aclanthology.org/2025.findings-naacl.168/) (Li et al., Findings 2025)
ACL
- Belinda Z. Li, Emmy Liu, Alexis Ross, Abbas Zeitoun, Graham Neubig, and Jacob Andreas. 2025. Language Modeling with Editable External Knowledge. In Findings of the Association for Computational Linguistics: NAACL 2025, pages 3070–3090, Albuquerque, New Mexico. Association for Computational Linguistics.