@inproceedings{koopman-etal-2019-fast,
title = "Fast and Discriminative Semantic Embedding",
author = "Koopman, Rob and
Wang, Shenghui and
Englebienne, Gwenn",
editor = "Dobnik, Simon and
Chatzikyriakidis, Stergios and
Demberg, Vera",
booktitle = "Proceedings of the 13th International Conference on Computational Semantics - Long Papers",
month = may,
year = "2019",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-0420",
doi = "10.18653/v1/W19-0420",
pages = "235--246",
abstract = "The embedding of words and documents in compact, semantically meaningful vector spaces is a crucial part of modern information systems. Deep Learning models are powerful but their hyperparameter selection is often complex and they are expensive to train, and while pre-trained models are available, embeddings trained on general corpora are not necessarily well-suited to domain specific tasks. We propose a novel embedding method which extends random projection by weighting and projecting raw term embeddings orthogonally to an average language vector, thus improving the discriminating power of resulting term embeddings, and build more meaningful document embeddings by assigning appropriate weights to individual terms. We describe how updating the term embeddings online as we process the training data results in an extremely efficient method, in terms of both computational and memory requirements. Our experiments show highly competitive results with various state-of-the-art embedding methods on different tasks, including the standard STS benchmark and a subject prediction task, at a fraction of the computational cost.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koopman-etal-2019-fast">
<titleInfo>
<title>Fast and Discriminative Semantic Embedding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Koopman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shenghui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gwenn</namePart>
<namePart type="family">Englebienne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Computational Semantics - Long Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The embedding of words and documents in compact, semantically meaningful vector spaces is a crucial part of modern information systems. Deep Learning models are powerful but their hyperparameter selection is often complex and they are expensive to train, and while pre-trained models are available, embeddings trained on general corpora are not necessarily well-suited to domain specific tasks. We propose a novel embedding method which extends random projection by weighting and projecting raw term embeddings orthogonally to an average language vector, thus improving the discriminating power of resulting term embeddings, and build more meaningful document embeddings by assigning appropriate weights to individual terms. We describe how updating the term embeddings online as we process the training data results in an extremely efficient method, in terms of both computational and memory requirements. Our experiments show highly competitive results with various state-of-the-art embedding methods on different tasks, including the standard STS benchmark and a subject prediction task, at a fraction of the computational cost.</abstract>
<identifier type="citekey">koopman-etal-2019-fast</identifier>
<identifier type="doi">10.18653/v1/W19-0420</identifier>
<location>
<url>https://aclanthology.org/W19-0420</url>
</location>
<part>
<date>2019-05</date>
<extent unit="page">
<start>235</start>
<end>246</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fast and Discriminative Semantic Embedding
%A Koopman, Rob
%A Wang, Shenghui
%A Englebienne, Gwenn
%Y Dobnik, Simon
%Y Chatzikyriakidis, Stergios
%Y Demberg, Vera
%S Proceedings of the 13th International Conference on Computational Semantics - Long Papers
%D 2019
%8 May
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F koopman-etal-2019-fast
%X The embedding of words and documents in compact, semantically meaningful vector spaces is a crucial part of modern information systems. Deep Learning models are powerful but their hyperparameter selection is often complex and they are expensive to train, and while pre-trained models are available, embeddings trained on general corpora are not necessarily well-suited to domain specific tasks. We propose a novel embedding method which extends random projection by weighting and projecting raw term embeddings orthogonally to an average language vector, thus improving the discriminating power of resulting term embeddings, and build more meaningful document embeddings by assigning appropriate weights to individual terms. We describe how updating the term embeddings online as we process the training data results in an extremely efficient method, in terms of both computational and memory requirements. Our experiments show highly competitive results with various state-of-the-art embedding methods on different tasks, including the standard STS benchmark and a subject prediction task, at a fraction of the computational cost.
%R 10.18653/v1/W19-0420
%U https://aclanthology.org/W19-0420
%U https://doi.org/10.18653/v1/W19-0420
%P 235-246
Markdown (Informal)
[Fast and Discriminative Semantic Embedding](https://aclanthology.org/W19-0420) (Koopman et al., IWCS 2019)
ACL
- Rob Koopman, Shenghui Wang, and Gwenn Englebienne. 2019. Fast and Discriminative Semantic Embedding. In Proceedings of the 13th International Conference on Computational Semantics - Long Papers, pages 235–246, Gothenburg, Sweden. Association for Computational Linguistics.