@inproceedings{pierrejean-tanguy-2018-predicting,
title = "Predicting Word Embeddings Variability",
author = "Pierrejean, B{\'e}n{\'e}dicte and
Tanguy, Ludovic",
editor = "Nissim, Malvina and
Berant, Jonathan and
Lenci, Alessandro",
booktitle = "Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S18-2019",
doi = "10.18653/v1/S18-2019",
pages = "154--159",
abstract = "Neural word embeddings models (such as those built with word2vec) are known to have stability problems: when retraining a model with the exact same hyperparameters, words neighborhoods may change. We propose a method to estimate such variation, based on the overlap of neighbors of a given word in two models trained with identical hyperparameters. We show that this inherent variation is not negligible, and that it does not affect every word in the same way. We examine the influence of several features that are intrinsic to a word, corpus or embedding model and provide a methodology that can predict the variability (and as such, reliability) of a word representation in a semantic vector space.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pierrejean-tanguy-2018-predicting">
<titleInfo>
<title>Predicting Word Embeddings Variability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bénédicte</namePart>
<namePart type="family">Pierrejean</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ludovic</namePart>
<namePart type="family">Tanguy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Berant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural word embeddings models (such as those built with word2vec) are known to have stability problems: when retraining a model with the exact same hyperparameters, words neighborhoods may change. We propose a method to estimate such variation, based on the overlap of neighbors of a given word in two models trained with identical hyperparameters. We show that this inherent variation is not negligible, and that it does not affect every word in the same way. We examine the influence of several features that are intrinsic to a word, corpus or embedding model and provide a methodology that can predict the variability (and as such, reliability) of a word representation in a semantic vector space.</abstract>
<identifier type="citekey">pierrejean-tanguy-2018-predicting</identifier>
<identifier type="doi">10.18653/v1/S18-2019</identifier>
<location>
<url>https://aclanthology.org/S18-2019</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>154</start>
<end>159</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predicting Word Embeddings Variability
%A Pierrejean, Bénédicte
%A Tanguy, Ludovic
%Y Nissim, Malvina
%Y Berant, Jonathan
%Y Lenci, Alessandro
%S Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F pierrejean-tanguy-2018-predicting
%X Neural word embeddings models (such as those built with word2vec) are known to have stability problems: when retraining a model with the exact same hyperparameters, words neighborhoods may change. We propose a method to estimate such variation, based on the overlap of neighbors of a given word in two models trained with identical hyperparameters. We show that this inherent variation is not negligible, and that it does not affect every word in the same way. We examine the influence of several features that are intrinsic to a word, corpus or embedding model and provide a methodology that can predict the variability (and as such, reliability) of a word representation in a semantic vector space.
%R 10.18653/v1/S18-2019
%U https://aclanthology.org/S18-2019
%U https://doi.org/10.18653/v1/S18-2019
%P 154-159
Markdown (Informal)
[Predicting Word Embeddings Variability](https://aclanthology.org/S18-2019) (Pierrejean & Tanguy, *SEM 2018)
ACL
- Bénédicte Pierrejean and Ludovic Tanguy. 2018. Predicting Word Embeddings Variability. In Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics, pages 154–159, New Orleans, Louisiana. Association for Computational Linguistics.