@inproceedings{park-etal-2025-unifying,
title = "Unifying Uniform and Binary-coding Quantization for Accurate Compression of Large Language Models",
author = "Park, Seungcheol and
Bae, Jeongin and
Kwon, Beomseok and
Kim, Minjun and
Kim, Byeongwook and
Kwon, Se Jung and
Kang, U and
Lee, Dongsoo",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1382/",
doi = "10.18653/v1/2025.acl-long.1382",
pages = "28468--28488",
ISBN = "979-8-89176-251-0",
abstract = "How can we quantize large language models while preserving accuracy? Quantization is essential for deploying large language models (LLMs) efficiently. Binary-coding quantization (BCQ) and uniform quantization (UQ) are promising quantization schemes that have strong expressiveness and optimizability, respectively. However, neither scheme leverages both advantages. In this paper, we propose UniQuan$_F$ (Unified Quantization with Flexible Mapping), an accurate quantization method for LLMs. UniQuan$_F$ harnesses both strong expressiveness and optimizability by unifying the flexible mapping technique in UQ and BCQ{'}s non-uniform quantization levels. We propose unified initialization, and local and periodic mapping techniques to optimize the parameters in UniQuan$_F$ precisely. After optimization, our unification theorem removes computational and memory overhead, allowing us to utilize the superior accuracy of UniQuan$_F$ without extra deployment costs induced by the unification. Experimental results demonstrate that UniQuan$_F$ outperforms existing UQ and BCQ methods, achieving up to 4.60{\%} higher accuracy on GSM8K benchmark."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="park-etal-2025-unifying">
<titleInfo>
<title>Unifying Uniform and Binary-coding Quantization for Accurate Compression of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seungcheol</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeongin</namePart>
<namePart type="family">Bae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beomseok</namePart>
<namePart type="family">Kwon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minjun</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byeongwook</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Se</namePart>
<namePart type="given">Jung</namePart>
<namePart type="family">Kwon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">U</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongsoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>How can we quantize large language models while preserving accuracy? Quantization is essential for deploying large language models (LLMs) efficiently. Binary-coding quantization (BCQ) and uniform quantization (UQ) are promising quantization schemes that have strong expressiveness and optimizability, respectively. However, neither scheme leverages both advantages. In this paper, we propose UniQuan_F (Unified Quantization with Flexible Mapping), an accurate quantization method for LLMs. UniQuan_F harnesses both strong expressiveness and optimizability by unifying the flexible mapping technique in UQ and BCQ’s non-uniform quantization levels. We propose unified initialization, and local and periodic mapping techniques to optimize the parameters in UniQuan_F precisely. After optimization, our unification theorem removes computational and memory overhead, allowing us to utilize the superior accuracy of UniQuan_F without extra deployment costs induced by the unification. Experimental results demonstrate that UniQuan_F outperforms existing UQ and BCQ methods, achieving up to 4.60% higher accuracy on GSM8K benchmark.</abstract>
<identifier type="citekey">park-etal-2025-unifying</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1382</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1382/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>28468</start>
<end>28488</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unifying Uniform and Binary-coding Quantization for Accurate Compression of Large Language Models
%A Park, Seungcheol
%A Bae, Jeongin
%A Kwon, Beomseok
%A Kim, Minjun
%A Kim, Byeongwook
%A Kwon, Se Jung
%A Kang, U.
%A Lee, Dongsoo
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F park-etal-2025-unifying
%X How can we quantize large language models while preserving accuracy? Quantization is essential for deploying large language models (LLMs) efficiently. Binary-coding quantization (BCQ) and uniform quantization (UQ) are promising quantization schemes that have strong expressiveness and optimizability, respectively. However, neither scheme leverages both advantages. In this paper, we propose UniQuan_F (Unified Quantization with Flexible Mapping), an accurate quantization method for LLMs. UniQuan_F harnesses both strong expressiveness and optimizability by unifying the flexible mapping technique in UQ and BCQ’s non-uniform quantization levels. We propose unified initialization, and local and periodic mapping techniques to optimize the parameters in UniQuan_F precisely. After optimization, our unification theorem removes computational and memory overhead, allowing us to utilize the superior accuracy of UniQuan_F without extra deployment costs induced by the unification. Experimental results demonstrate that UniQuan_F outperforms existing UQ and BCQ methods, achieving up to 4.60% higher accuracy on GSM8K benchmark.
%R 10.18653/v1/2025.acl-long.1382
%U https://aclanthology.org/2025.acl-long.1382/
%U https://doi.org/10.18653/v1/2025.acl-long.1382
%P 28468-28488
Markdown (Informal)
[Unifying Uniform and Binary-coding Quantization for Accurate Compression of Large Language Models](https://aclanthology.org/2025.acl-long.1382/) (Park et al., ACL 2025)
ACL
- Seungcheol Park, Jeongin Bae, Beomseok Kwon, Minjun Kim, Byeongwook Kim, Se Jung Kwon, U Kang, and Dongsoo Lee. 2025. Unifying Uniform and Binary-coding Quantization for Accurate Compression of Large Language Models. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 28468–28488, Vienna, Austria. Association for Computational Linguistics.