@inproceedings{nguyen-etal-2025-langcompress,
title = "{L}ang{C}ompress: Language-Aware Compression of Large Language Models",
author = "Nguyen, Dieu-Hien and
Le, Nguyen-Khang and
Do, Truong Dinh and
Nguyen, Le-Minh",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://aclanthology.org/2025.ijcnlp-long.112/",
pages = "2068--2077",
ISBN = "979-8-89176-298-5",
abstract = "Large Language Models (LLMs) demonstrate strong multilingual capabilities but are costly to deploy due to their size and computational demands. To mitigate this, compression techniques such as pruning and quantization are widely used. However, these methods face two key limitations: (1) they assume access to high-quality instruction or calibration data, which is often unavailable for low-resource languages; and (2) they aim to preserve multilingual generality, making them inefficient for language-specific applications. We introduce LangCompress, a language-aware compression framework that enhances existing compression methods for targeted deployment. LangCompress is method-agnostic and improves state-of-the-art pruning and quantization approaches. It features two core components: an iterative self-supervised pipeline for generating instruction data in the target language, and a vocabulary simplification strategy that reduces the LM head to focus on key tokens. Experiments on perplexity, translation, and summarization tasks show that LangCompress improves performance in the target language. The code and data are publicly available."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2025-langcompress">
<titleInfo>
<title>LangCompress: Language-Aware Compression of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieu-Hien</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen-Khang</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Truong</namePart>
<namePart type="given">Dinh</namePart>
<namePart type="family">Do</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Le-Minh</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haofen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Biplab</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhirendra</namePart>
<namePart type="given">Pratap</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Asian Federation of Natural Language Processing and The Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-298-5</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) demonstrate strong multilingual capabilities but are costly to deploy due to their size and computational demands. To mitigate this, compression techniques such as pruning and quantization are widely used. However, these methods face two key limitations: (1) they assume access to high-quality instruction or calibration data, which is often unavailable for low-resource languages; and (2) they aim to preserve multilingual generality, making them inefficient for language-specific applications. We introduce LangCompress, a language-aware compression framework that enhances existing compression methods for targeted deployment. LangCompress is method-agnostic and improves state-of-the-art pruning and quantization approaches. It features two core components: an iterative self-supervised pipeline for generating instruction data in the target language, and a vocabulary simplification strategy that reduces the LM head to focus on key tokens. Experiments on perplexity, translation, and summarization tasks show that LangCompress improves performance in the target language. The code and data are publicly available.</abstract>
<identifier type="citekey">nguyen-etal-2025-langcompress</identifier>
<location>
<url>https://aclanthology.org/2025.ijcnlp-long.112/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>2068</start>
<end>2077</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LangCompress: Language-Aware Compression of Large Language Models
%A Nguyen, Dieu-Hien
%A Le, Nguyen-Khang
%A Do, Truong Dinh
%A Nguyen, Le-Minh
%Y Inui, Kentaro
%Y Sakti, Sakriani
%Y Wang, Haofen
%Y Wong, Derek F.
%Y Bhattacharyya, Pushpak
%Y Banerjee, Biplab
%Y Ekbal, Asif
%Y Chakraborty, Tanmoy
%Y Singh, Dhirendra Pratap
%S Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics
%D 2025
%8 December
%I The Asian Federation of Natural Language Processing and The Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-298-5
%F nguyen-etal-2025-langcompress
%X Large Language Models (LLMs) demonstrate strong multilingual capabilities but are costly to deploy due to their size and computational demands. To mitigate this, compression techniques such as pruning and quantization are widely used. However, these methods face two key limitations: (1) they assume access to high-quality instruction or calibration data, which is often unavailable for low-resource languages; and (2) they aim to preserve multilingual generality, making them inefficient for language-specific applications. We introduce LangCompress, a language-aware compression framework that enhances existing compression methods for targeted deployment. LangCompress is method-agnostic and improves state-of-the-art pruning and quantization approaches. It features two core components: an iterative self-supervised pipeline for generating instruction data in the target language, and a vocabulary simplification strategy that reduces the LM head to focus on key tokens. Experiments on perplexity, translation, and summarization tasks show that LangCompress improves performance in the target language. The code and data are publicly available.
%U https://aclanthology.org/2025.ijcnlp-long.112/
%P 2068-2077
Markdown (Informal)
[LangCompress: Language-Aware Compression of Large Language Models](https://aclanthology.org/2025.ijcnlp-long.112/) (Nguyen et al., IJCNLP-AACL 2025)
ACL
- Dieu-Hien Nguyen, Nguyen-Khang Le, Truong Dinh Do, and Le-Minh Nguyen. 2025. LangCompress: Language-Aware Compression of Large Language Models. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 2068–2077, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.