@inproceedings{gornerup-gillblad-2018-streaming,
title = "Streaming word similarity mining on the cheap",
author = {G{\"o}rnerup, Olof and
Gillblad, Daniel},
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1172",
doi = "10.18653/v1/D18-1172",
pages = "1425--1434",
abstract = "Accurately and efficiently estimating word similarities from text is fundamental in natural language processing. In this paper, we propose a fast and lightweight method for estimating similarities from streams by explicitly counting second-order co-occurrences. The method rests on the observation that words that are highly correlated with respect to such counts are also highly similar with respect to first-order co-occurrences. Using buffers of co-occurred words per word to count second-order co-occurrences, we can then estimate similarities in a single pass over data without having to do prohibitively expensive similarity calculations. We demonstrate that this approach is scalable, converges rapidly, behaves robustly under parameter changes, and that it captures word similarities on par with those given by state-of-the-art word embeddings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gornerup-gillblad-2018-streaming">
<titleInfo>
<title>Streaming word similarity mining on the cheap</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olof</namePart>
<namePart type="family">Görnerup</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Gillblad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Accurately and efficiently estimating word similarities from text is fundamental in natural language processing. In this paper, we propose a fast and lightweight method for estimating similarities from streams by explicitly counting second-order co-occurrences. The method rests on the observation that words that are highly correlated with respect to such counts are also highly similar with respect to first-order co-occurrences. Using buffers of co-occurred words per word to count second-order co-occurrences, we can then estimate similarities in a single pass over data without having to do prohibitively expensive similarity calculations. We demonstrate that this approach is scalable, converges rapidly, behaves robustly under parameter changes, and that it captures word similarities on par with those given by state-of-the-art word embeddings.</abstract>
<identifier type="citekey">gornerup-gillblad-2018-streaming</identifier>
<identifier type="doi">10.18653/v1/D18-1172</identifier>
<location>
<url>https://aclanthology.org/D18-1172</url>
</location>
<part>
<date>2018-oct-nov</date>
<extent unit="page">
<start>1425</start>
<end>1434</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Streaming word similarity mining on the cheap
%A Görnerup, Olof
%A Gillblad, Daniel
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F gornerup-gillblad-2018-streaming
%X Accurately and efficiently estimating word similarities from text is fundamental in natural language processing. In this paper, we propose a fast and lightweight method for estimating similarities from streams by explicitly counting second-order co-occurrences. The method rests on the observation that words that are highly correlated with respect to such counts are also highly similar with respect to first-order co-occurrences. Using buffers of co-occurred words per word to count second-order co-occurrences, we can then estimate similarities in a single pass over data without having to do prohibitively expensive similarity calculations. We demonstrate that this approach is scalable, converges rapidly, behaves robustly under parameter changes, and that it captures word similarities on par with those given by state-of-the-art word embeddings.
%R 10.18653/v1/D18-1172
%U https://aclanthology.org/D18-1172
%U https://doi.org/10.18653/v1/D18-1172
%P 1425-1434
Markdown (Informal)
[Streaming word similarity mining on the cheap](https://aclanthology.org/D18-1172) (Görnerup & Gillblad, EMNLP 2018)
ACL
- Olof Görnerup and Daniel Gillblad. 2018. Streaming word similarity mining on the cheap. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 1425–1434, Brussels, Belgium. Association for Computational Linguistics.