@inproceedings{gurgurov-etal-2025-multilingual-encoder,
title = "On Multilingual Encoder Language Model Compression for Low-Resource Languages",
author = "Gurgurov, Daniil and
Gregor, Michal and
Genabith, Josef Van and
Ostermann, Simon",
editor = "T.y.s.s, Santosh and
Shimizu, Shuichiro and
Gong, Yifan",
booktitle = "The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.ijcnlp-srw.5/",
pages = "47--58",
ISBN = "979-8-89176-304-3",
abstract = "In this paper, we combine two-step knowledge distillation, structured pruning, truncation, and vocabulary trimming for extremely compressing multilingual encoder-only language models for low-resource languages. Our novel approach systematically combines existing techniques and takes them to the extreme, reducing layer depth, feed-forward hidden size, and intermediate layer embedding size to create significantly smaller monolingual models while retaining essential language-specific knowledge. We achieve compression rates of up to 92{\%} while maintaining competitive performance, with average drops of 2{--}10{\%} for moderate compression and 8{--}13{\%} at maximum compression in four downstream tasks, including sentiment analysis, topic classification, named entity recognition, and part-of-speech tagging, across three low-resource languages. Notably, the performance degradation correlates with the amount of language-specific data in the teacher model, with larger datasets resulting in smaller performance losses. Additionally, we conduct ablation studies to identify the best practices for multilingual model compression using these techniques."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gurgurov-etal-2025-multilingual-encoder">
<titleInfo>
<title>On Multilingual Encoder Language Model Compression for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Gurgurov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Gregor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="given">Van</namePart>
<namePart type="family">Genabith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Ostermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.y.s.s</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuichiro</namePart>
<namePart type="family">Shimizu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifan</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-304-3</identifier>
</relatedItem>
<abstract>In this paper, we combine two-step knowledge distillation, structured pruning, truncation, and vocabulary trimming for extremely compressing multilingual encoder-only language models for low-resource languages. Our novel approach systematically combines existing techniques and takes them to the extreme, reducing layer depth, feed-forward hidden size, and intermediate layer embedding size to create significantly smaller monolingual models while retaining essential language-specific knowledge. We achieve compression rates of up to 92% while maintaining competitive performance, with average drops of 2–10% for moderate compression and 8–13% at maximum compression in four downstream tasks, including sentiment analysis, topic classification, named entity recognition, and part-of-speech tagging, across three low-resource languages. Notably, the performance degradation correlates with the amount of language-specific data in the teacher model, with larger datasets resulting in smaller performance losses. Additionally, we conduct ablation studies to identify the best practices for multilingual model compression using these techniques.</abstract>
<identifier type="citekey">gurgurov-etal-2025-multilingual-encoder</identifier>
<location>
<url>https://aclanthology.org/2025.ijcnlp-srw.5/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>47</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Multilingual Encoder Language Model Compression for Low-Resource Languages
%A Gurgurov, Daniil
%A Gregor, Michal
%A Genabith, Josef Van
%A Ostermann, Simon
%Y T.y.s.s, Santosh
%Y Shimizu, Shuichiro
%Y Gong, Yifan
%S The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-304-3
%F gurgurov-etal-2025-multilingual-encoder
%X In this paper, we combine two-step knowledge distillation, structured pruning, truncation, and vocabulary trimming for extremely compressing multilingual encoder-only language models for low-resource languages. Our novel approach systematically combines existing techniques and takes them to the extreme, reducing layer depth, feed-forward hidden size, and intermediate layer embedding size to create significantly smaller monolingual models while retaining essential language-specific knowledge. We achieve compression rates of up to 92% while maintaining competitive performance, with average drops of 2–10% for moderate compression and 8–13% at maximum compression in four downstream tasks, including sentiment analysis, topic classification, named entity recognition, and part-of-speech tagging, across three low-resource languages. Notably, the performance degradation correlates with the amount of language-specific data in the teacher model, with larger datasets resulting in smaller performance losses. Additionally, we conduct ablation studies to identify the best practices for multilingual model compression using these techniques.
%U https://aclanthology.org/2025.ijcnlp-srw.5/
%P 47-58
Markdown (Informal)
[On Multilingual Encoder Language Model Compression for Low-Resource Languages](https://aclanthology.org/2025.ijcnlp-srw.5/) (Gurgurov et al., IJCNLP 2025)
ACL
- Daniil Gurgurov, Michal Gregor, Josef Van Genabith, and Simon Ostermann. 2025. On Multilingual Encoder Language Model Compression for Low-Resource Languages. In The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 47–58, Mumbai, India. Association for Computational Linguistics.