@inproceedings{martynov-etal-2024-way,
title = "On the Way to Lossless Compression of Language Transformers: Exploring Cross-Domain Properties of Quantization",
author = "Martynov, Nikita and
Goncharov, Aleksei and
Kumichev, Gleb and
Egorov, Evgeniy and
Pavlov, Stanislav Vladimirovich and
Durinov, Mikhail Sergeevich and
Zuev, Aleksandr Sergeevich and
Filimonov, Egor Anatolievich",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1089",
pages = "12435--12442",
abstract = "Modern Transformers achieved impressive results on various Natural Language Processing tasks over the last few years. The one downside of this success is the size of these models. Huge capacity, which sometimes surpasses billions of parameters, improves generalization abilities, but makes it difficult to employ. Developing field of model compression seeks to reduce the model size and inference latency. This research focuses on one of the compression techniques {---} Post-Training Quantization. We present a methodology to effectively quantize at least 95{\%} of Transformer weights and corresponding activations to INT8 without any access to task-specific data so the drop in performance does not exceed 0.02{\%}. Furthermore, we provide intriguing observations that reflect cross-domain nature of some of the quantization properties.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="martynov-etal-2024-way">
<titleInfo>
<title>On the Way to Lossless Compression of Language Transformers: Exploring Cross-Domain Properties of Quantization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikita</namePart>
<namePart type="family">Martynov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksei</namePart>
<namePart type="family">Goncharov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gleb</namePart>
<namePart type="family">Kumichev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evgeniy</namePart>
<namePart type="family">Egorov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stanislav</namePart>
<namePart type="given">Vladimirovich</namePart>
<namePart type="family">Pavlov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikhail</namePart>
<namePart type="given">Sergeevich</namePart>
<namePart type="family">Durinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="given">Sergeevich</namePart>
<namePart type="family">Zuev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Egor</namePart>
<namePart type="given">Anatolievich</namePart>
<namePart type="family">Filimonov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern Transformers achieved impressive results on various Natural Language Processing tasks over the last few years. The one downside of this success is the size of these models. Huge capacity, which sometimes surpasses billions of parameters, improves generalization abilities, but makes it difficult to employ. Developing field of model compression seeks to reduce the model size and inference latency. This research focuses on one of the compression techniques — Post-Training Quantization. We present a methodology to effectively quantize at least 95% of Transformer weights and corresponding activations to INT8 without any access to task-specific data so the drop in performance does not exceed 0.02%. Furthermore, we provide intriguing observations that reflect cross-domain nature of some of the quantization properties.</abstract>
<identifier type="citekey">martynov-etal-2024-way</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1089</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>12435</start>
<end>12442</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Way to Lossless Compression of Language Transformers: Exploring Cross-Domain Properties of Quantization
%A Martynov, Nikita
%A Goncharov, Aleksei
%A Kumichev, Gleb
%A Egorov, Evgeniy
%A Pavlov, Stanislav Vladimirovich
%A Durinov, Mikhail Sergeevich
%A Zuev, Aleksandr Sergeevich
%A Filimonov, Egor Anatolievich
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F martynov-etal-2024-way
%X Modern Transformers achieved impressive results on various Natural Language Processing tasks over the last few years. The one downside of this success is the size of these models. Huge capacity, which sometimes surpasses billions of parameters, improves generalization abilities, but makes it difficult to employ. Developing field of model compression seeks to reduce the model size and inference latency. This research focuses on one of the compression techniques — Post-Training Quantization. We present a methodology to effectively quantize at least 95% of Transformer weights and corresponding activations to INT8 without any access to task-specific data so the drop in performance does not exceed 0.02%. Furthermore, we provide intriguing observations that reflect cross-domain nature of some of the quantization properties.
%U https://aclanthology.org/2024.lrec-main.1089
%P 12435-12442
Markdown (Informal)
[On the Way to Lossless Compression of Language Transformers: Exploring Cross-Domain Properties of Quantization](https://aclanthology.org/2024.lrec-main.1089) (Martynov et al., LREC-COLING 2024)
ACL
- Nikita Martynov, Aleksei Goncharov, Gleb Kumichev, Evgeniy Egorov, Stanislav Vladimirovich Pavlov, Mikhail Sergeevich Durinov, Aleksandr Sergeevich Zuev, and Egor Anatolievich Filimonov. 2024. On the Way to Lossless Compression of Language Transformers: Exploring Cross-Domain Properties of Quantization. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 12435–12442, Torino, Italia. ELRA and ICCL.