@inproceedings{wang-etal-2026-compressed,
title = "Through a Compressed Lens: Investigating The Impact of Quantization on Factual Knowledge Recall",
author = {Wang, Qianli and
Wang, Mingyang and
Feldhus, Nils and
Ostermann, Simon and
Cao, Yuan and
Schuetze, Hinrich and
M{\"o}ller, Sebastian and
Schmitt, Vera},
editor = "Chang, Kai-Wei and
Mehrabi, Ninareh and
Krishna, Satyapriya and
Das, Anubrata and
Dhamala, Jwala and
Cao, Yang Trista and
Kumarage, Tharindu and
Ramakrishna, Anil and
Christodoulopoulos, Christos and
Wan, Yixin and
Galystan, Aram and
Kumar, Anoop and
Gupta, Rahul",
booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.trustnlp-main.2/",
pages = "21--39",
ISBN = "979-8-89176-418-7",
abstract = "Quantization methods are widely used to accelerate inference and streamline the deployment of large language models (LLMs). Although quantization{'}s effects on various LLM capabilities have been extensively studied, one critical area remains underexplored: factual knowledge recall (FKR), the process by which LLMs access stored knowledge. To this end, we conduct comprehensive experiments using three common quantization techniques at distinct bit widths, in conjunction with interpretability-driven analyses on two tasks, knowledge memorization and latent multi-hop reasoning. We show that quantization typically results in information loss within LLMs, consequently diminishing their capacity for FKR. This effect is particularly amplified in smaller models within the same architectural families. However, models quantized at reduced bit precision do not consistently exhibit inferior performance and occasionally quantization may even enhance model FKR. We find that BitSandBytes demonstrates highest preservation of the original full-precision model{'}s FKR. Despite variability across models and methods, quantization causes modest performance degradation and remains an effective compression strategy."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-compressed">
<titleInfo>
<title>Through a Compressed Lens: Investigating The Impact of Quantization on Factual Knowledge Recall</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qianli</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Feldhus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Ostermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schuetze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Schmitt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Trustworthy NLP (TrustNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ninareh</namePart>
<namePart type="family">Mehrabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satyapriya</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anubrata</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jwala</namePart>
<namePart type="family">Dhamala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="given">Trista</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Kumarage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anil</namePart>
<namePart type="family">Ramakrishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aram</namePart>
<namePart type="family">Galystan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-418-7</identifier>
</relatedItem>
<abstract>Quantization methods are widely used to accelerate inference and streamline the deployment of large language models (LLMs). Although quantization’s effects on various LLM capabilities have been extensively studied, one critical area remains underexplored: factual knowledge recall (FKR), the process by which LLMs access stored knowledge. To this end, we conduct comprehensive experiments using three common quantization techniques at distinct bit widths, in conjunction with interpretability-driven analyses on two tasks, knowledge memorization and latent multi-hop reasoning. We show that quantization typically results in information loss within LLMs, consequently diminishing their capacity for FKR. This effect is particularly amplified in smaller models within the same architectural families. However, models quantized at reduced bit precision do not consistently exhibit inferior performance and occasionally quantization may even enhance model FKR. We find that BitSandBytes demonstrates highest preservation of the original full-precision model’s FKR. Despite variability across models and methods, quantization causes modest performance degradation and remains an effective compression strategy.</abstract>
<identifier type="citekey">wang-etal-2026-compressed</identifier>
<location>
<url>https://aclanthology.org/2026.trustnlp-main.2/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>21</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Through a Compressed Lens: Investigating The Impact of Quantization on Factual Knowledge Recall
%A Wang, Qianli
%A Wang, Mingyang
%A Feldhus, Nils
%A Ostermann, Simon
%A Cao, Yuan
%A Schuetze, Hinrich
%A Möller, Sebastian
%A Schmitt, Vera
%Y Chang, Kai-Wei
%Y Mehrabi, Ninareh
%Y Krishna, Satyapriya
%Y Das, Anubrata
%Y Dhamala, Jwala
%Y Cao, Yang Trista
%Y Kumarage, Tharindu
%Y Ramakrishna, Anil
%Y Christodoulopoulos, Christos
%Y Wan, Yixin
%Y Galystan, Aram
%Y Kumar, Anoop
%Y Gupta, Rahul
%S Proceedings of the 6th Workshop on Trustworthy NLP (TrustNLP 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-418-7
%F wang-etal-2026-compressed
%X Quantization methods are widely used to accelerate inference and streamline the deployment of large language models (LLMs). Although quantization’s effects on various LLM capabilities have been extensively studied, one critical area remains underexplored: factual knowledge recall (FKR), the process by which LLMs access stored knowledge. To this end, we conduct comprehensive experiments using three common quantization techniques at distinct bit widths, in conjunction with interpretability-driven analyses on two tasks, knowledge memorization and latent multi-hop reasoning. We show that quantization typically results in information loss within LLMs, consequently diminishing their capacity for FKR. This effect is particularly amplified in smaller models within the same architectural families. However, models quantized at reduced bit precision do not consistently exhibit inferior performance and occasionally quantization may even enhance model FKR. We find that BitSandBytes demonstrates highest preservation of the original full-precision model’s FKR. Despite variability across models and methods, quantization causes modest performance degradation and remains an effective compression strategy.
%U https://aclanthology.org/2026.trustnlp-main.2/
%P 21-39
Markdown (Informal)
[Through a Compressed Lens: Investigating The Impact of Quantization on Factual Knowledge Recall](https://aclanthology.org/2026.trustnlp-main.2/) (Wang et al., TrustNLP 2026)
ACL
- Qianli Wang, Mingyang Wang, Nils Feldhus, Simon Ostermann, Yuan Cao, Hinrich Schuetze, Sebastian Möller, and Vera Schmitt. 2026. Through a Compressed Lens: Investigating The Impact of Quantization on Factual Knowledge Recall. In Proceedings of the 6th Workshop on Trustworthy NLP (TrustNLP 2026), pages 21–39, San Diego, California. Association for Computational Linguistics.