@inproceedings{alkathiri-etal-2021-decentralized,
title = "Decentralized {W}ord2{V}ec Using Gossip Learning",
author = "Alkathiri, Abdul Aziz and
Giaretta, Lodovico and
Girdzijauskas, Sarunas and
Sahlgren, Magnus",
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://aclanthology.org/2021.nodalida-main.40",
pages = "373--377",
abstract = "Advanced NLP models require huge amounts of data from various domains to produce high-quality representations. It is useful then for a few large public and private organizations to join their corpora during training. However, factors such as legislation and user emphasis on data privacy may prevent centralized orchestration and data sharing among these organizations. Therefore, for this specific scenario, we investigate how gossip learning, a massively-parallel, data-private, decentralized protocol, compares to a shared-dataset solution. We find that the application of Word2Vec in a gossip learning framework is viable. Without any tuning, the results are comparable to a traditional centralized setting, with a loss of quality as low as 4.3{\%}. Furthermore, the results are up to 54.8{\%} better than independent local training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alkathiri-etal-2021-decentralized">
<titleInfo>
<title>Decentralized Word2Vec Using Gossip Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdul</namePart>
<namePart type="given">Aziz</namePart>
<namePart type="family">Alkathiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lodovico</namePart>
<namePart type="family">Giaretta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarunas</namePart>
<namePart type="family">Girdzijauskas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magnus</namePart>
<namePart type="family">Sahlgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-may 31–2 jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press, Sweden</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Advanced NLP models require huge amounts of data from various domains to produce high-quality representations. It is useful then for a few large public and private organizations to join their corpora during training. However, factors such as legislation and user emphasis on data privacy may prevent centralized orchestration and data sharing among these organizations. Therefore, for this specific scenario, we investigate how gossip learning, a massively-parallel, data-private, decentralized protocol, compares to a shared-dataset solution. We find that the application of Word2Vec in a gossip learning framework is viable. Without any tuning, the results are comparable to a traditional centralized setting, with a loss of quality as low as 4.3%. Furthermore, the results are up to 54.8% better than independent local training.</abstract>
<identifier type="citekey">alkathiri-etal-2021-decentralized</identifier>
<location>
<url>https://aclanthology.org/2021.nodalida-main.40</url>
</location>
<part>
<date>2021-may 31–2 jun</date>
<extent unit="page">
<start>373</start>
<end>377</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Decentralized Word2Vec Using Gossip Learning
%A Alkathiri, Abdul Aziz
%A Giaretta, Lodovico
%A Girdzijauskas, Sarunas
%A Sahlgren, Magnus
%Y Dobnik, Simon
%Y Øvrelid, Lilja
%S Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2021
%8 may 31–2 jun
%I Linköping University Electronic Press, Sweden
%C Reykjavik, Iceland (Online)
%F alkathiri-etal-2021-decentralized
%X Advanced NLP models require huge amounts of data from various domains to produce high-quality representations. It is useful then for a few large public and private organizations to join their corpora during training. However, factors such as legislation and user emphasis on data privacy may prevent centralized orchestration and data sharing among these organizations. Therefore, for this specific scenario, we investigate how gossip learning, a massively-parallel, data-private, decentralized protocol, compares to a shared-dataset solution. We find that the application of Word2Vec in a gossip learning framework is viable. Without any tuning, the results are comparable to a traditional centralized setting, with a loss of quality as low as 4.3%. Furthermore, the results are up to 54.8% better than independent local training.
%U https://aclanthology.org/2021.nodalida-main.40
%P 373-377
Markdown (Informal)
[Decentralized Word2Vec Using Gossip Learning](https://aclanthology.org/2021.nodalida-main.40) (Alkathiri et al., NoDaLiDa 2021)
ACL
- Abdul Aziz Alkathiri, Lodovico Giaretta, Sarunas Girdzijauskas, and Magnus Sahlgren. 2021. Decentralized Word2Vec Using Gossip Learning. In Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), pages 373–377, Reykjavik, Iceland (Online). Linköping University Electronic Press, Sweden.