@inproceedings{fernandez-etal-2017-vecshare,
title = "{V}ec{S}hare: A Framework for Sharing Word Representation Vectors",
author = "Fernandez, Jared and
Yu, Zhaocheng and
Downey, Doug",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1032",
doi = "10.18653/v1/D17-1032",
pages = "316--320",
abstract = "Many Natural Language Processing (NLP) models rely on distributed vector representations of words. Because the process of training word vectors can require large amounts of data and computation, NLP researchers and practitioners often utilize pre-trained embeddings downloaded from the Web. However, finding the best embeddings for a given task is difficult, and can be computationally prohibitive. We present a framework, called VecShare, that makes it easy to share and retrieve word embeddings on the Web. The framework leverages a public data-sharing infrastructure to host embedding sets, and provides automated mechanisms for retrieving the embeddings most similar to a given corpus. We perform an experimental evaluation of VecShare{'}s similarity strategies, and show that they are effective at efficiently retrieving embeddings that boost accuracy in a document classification task. Finally, we provide an open-source Python library for using the VecShare framework.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fernandez-etal-2017-vecshare">
<titleInfo>
<title>VecShare: A Framework for Sharing Word Representation Vectors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jared</namePart>
<namePart type="family">Fernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhaocheng</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Doug</namePart>
<namePart type="family">Downey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many Natural Language Processing (NLP) models rely on distributed vector representations of words. Because the process of training word vectors can require large amounts of data and computation, NLP researchers and practitioners often utilize pre-trained embeddings downloaded from the Web. However, finding the best embeddings for a given task is difficult, and can be computationally prohibitive. We present a framework, called VecShare, that makes it easy to share and retrieve word embeddings on the Web. The framework leverages a public data-sharing infrastructure to host embedding sets, and provides automated mechanisms for retrieving the embeddings most similar to a given corpus. We perform an experimental evaluation of VecShare’s similarity strategies, and show that they are effective at efficiently retrieving embeddings that boost accuracy in a document classification task. Finally, we provide an open-source Python library for using the VecShare framework.</abstract>
<identifier type="citekey">fernandez-etal-2017-vecshare</identifier>
<identifier type="doi">10.18653/v1/D17-1032</identifier>
<location>
<url>https://aclanthology.org/D17-1032</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>316</start>
<end>320</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T VecShare: A Framework for Sharing Word Representation Vectors
%A Fernandez, Jared
%A Yu, Zhaocheng
%A Downey, Doug
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F fernandez-etal-2017-vecshare
%X Many Natural Language Processing (NLP) models rely on distributed vector representations of words. Because the process of training word vectors can require large amounts of data and computation, NLP researchers and practitioners often utilize pre-trained embeddings downloaded from the Web. However, finding the best embeddings for a given task is difficult, and can be computationally prohibitive. We present a framework, called VecShare, that makes it easy to share and retrieve word embeddings on the Web. The framework leverages a public data-sharing infrastructure to host embedding sets, and provides automated mechanisms for retrieving the embeddings most similar to a given corpus. We perform an experimental evaluation of VecShare’s similarity strategies, and show that they are effective at efficiently retrieving embeddings that boost accuracy in a document classification task. Finally, we provide an open-source Python library for using the VecShare framework.
%R 10.18653/v1/D17-1032
%U https://aclanthology.org/D17-1032
%U https://doi.org/10.18653/v1/D17-1032
%P 316-320
Markdown (Informal)
[VecShare: A Framework for Sharing Word Representation Vectors](https://aclanthology.org/D17-1032) (Fernandez et al., EMNLP 2017)
ACL