@inproceedings{melas-kyriazi-wang-2022-intrinsic,
title = "Intrinsic Gradient Compression for Scalable and Efficient Federated Learning",
author = "Melas-Kyriazi, Luke and
Wang, Franklyn",
editor = "Lin, Bill Yuchen and
He, Chaoyang and
Xie, Chulin and
Mireshghallah, Fatemehsadat and
Mehrabi, Ninareh and
Li, Tian and
Soltanolkotabi, Mahdi and
Ren, Xiang",
booktitle = "Proceedings of the First Workshop on Federated Learning for Natural Language Processing (FL4NLP 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.fl4nlp-1.4",
doi = "10.18653/v1/2022.fl4nlp-1.4",
pages = "27--41",
abstract = "Federated learning is a rapidly growing area of research, holding the promise of privacy-preserving distributed training on edge devices. The largest barrier to wider adoption of federated learning is the communication cost of model updates, which is accentuated by the fact that many edge devices are bandwidth-constrained. At the same time, within the machine learning theory community, a separate line of research has emerged around optimizing networks within a subspace of the full space of all parameters. The dimension of the smallest subspace for which these methods still yield strong results is called the intrinsic dimension. In this work, we prove a general correspondence between the notions of intrinsic dimension and gradient compressibility, and we show that a family of low-bandwidth federated learning algorithms, which we call intrinsic gradient compression algorithms, naturally emerges from this correspondence. Finally, we conduct large-scale NLP experiments using transformer models with over 100M parameters (GPT-2 and BERT), and show that our method significantly outperforms the state-of-the-art in gradient compression.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="melas-kyriazi-wang-2022-intrinsic">
<titleInfo>
<title>Intrinsic Gradient Compression for Scalable and Efficient Federated Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Melas-Kyriazi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franklyn</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Federated Learning for Natural Language Processing (FL4NLP 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bill</namePart>
<namePart type="given">Yuchen</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaoyang</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chulin</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatemehsadat</namePart>
<namePart type="family">Mireshghallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ninareh</namePart>
<namePart type="family">Mehrabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahdi</namePart>
<namePart type="family">Soltanolkotabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Federated learning is a rapidly growing area of research, holding the promise of privacy-preserving distributed training on edge devices. The largest barrier to wider adoption of federated learning is the communication cost of model updates, which is accentuated by the fact that many edge devices are bandwidth-constrained. At the same time, within the machine learning theory community, a separate line of research has emerged around optimizing networks within a subspace of the full space of all parameters. The dimension of the smallest subspace for which these methods still yield strong results is called the intrinsic dimension. In this work, we prove a general correspondence between the notions of intrinsic dimension and gradient compressibility, and we show that a family of low-bandwidth federated learning algorithms, which we call intrinsic gradient compression algorithms, naturally emerges from this correspondence. Finally, we conduct large-scale NLP experiments using transformer models with over 100M parameters (GPT-2 and BERT), and show that our method significantly outperforms the state-of-the-art in gradient compression.</abstract>
<identifier type="citekey">melas-kyriazi-wang-2022-intrinsic</identifier>
<identifier type="doi">10.18653/v1/2022.fl4nlp-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.fl4nlp-1.4</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>27</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Intrinsic Gradient Compression for Scalable and Efficient Federated Learning
%A Melas-Kyriazi, Luke
%A Wang, Franklyn
%Y Lin, Bill Yuchen
%Y He, Chaoyang
%Y Xie, Chulin
%Y Mireshghallah, Fatemehsadat
%Y Mehrabi, Ninareh
%Y Li, Tian
%Y Soltanolkotabi, Mahdi
%Y Ren, Xiang
%S Proceedings of the First Workshop on Federated Learning for Natural Language Processing (FL4NLP 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F melas-kyriazi-wang-2022-intrinsic
%X Federated learning is a rapidly growing area of research, holding the promise of privacy-preserving distributed training on edge devices. The largest barrier to wider adoption of federated learning is the communication cost of model updates, which is accentuated by the fact that many edge devices are bandwidth-constrained. At the same time, within the machine learning theory community, a separate line of research has emerged around optimizing networks within a subspace of the full space of all parameters. The dimension of the smallest subspace for which these methods still yield strong results is called the intrinsic dimension. In this work, we prove a general correspondence between the notions of intrinsic dimension and gradient compressibility, and we show that a family of low-bandwidth federated learning algorithms, which we call intrinsic gradient compression algorithms, naturally emerges from this correspondence. Finally, we conduct large-scale NLP experiments using transformer models with over 100M parameters (GPT-2 and BERT), and show that our method significantly outperforms the state-of-the-art in gradient compression.
%R 10.18653/v1/2022.fl4nlp-1.4
%U https://aclanthology.org/2022.fl4nlp-1.4
%U https://doi.org/10.18653/v1/2022.fl4nlp-1.4
%P 27-41
Markdown (Informal)
[Intrinsic Gradient Compression for Scalable and Efficient Federated Learning](https://aclanthology.org/2022.fl4nlp-1.4) (Melas-Kyriazi & Wang, FL4NLP 2022)
ACL