@inproceedings{wang-etal-2020-ji-yu,
title = "基于强负采样的词嵌入优化算法(Word Embedding Optimization Based on Hard Negative Sampling)",
author = "Wang, Yuchen and
Lin, Miaozhe and
Zhan, Jiefan",
editor = "Sun, Maosong and
Li, Sujian and
Zhang, Yue and
Liu, Yang",
booktitle = "Proceedings of the 19th Chinese National Conference on Computational Linguistics",
month = oct,
year = "2020",
address = "Haikou, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2020.ccl-1.20",
pages = "207--214",
abstract = "word2vec是自然语言处理领域重要的词嵌入算法之一,为了解决随机负采样作为优化目标可能出现的样本贡献消失问题,提出了可以应用在CBOW和Skip-gram框架上的以余弦距离为度量的强负采样方法:HNS-CBOW和HNS-SG。将原随机负采样过程拆解为两个步骤,首先,计算随机负样本与目标词的余弦距离,然后,再使用距离较近的强负样本更新参数。以英文维基百科数据作为实验语料,在公开的语义-语法数据集上对优化算法的效果进行了定量分析,实验表明,优化后的词嵌入质量显著优于原方法。同时,与GloVe等公开发布的预训练词向量相比,可以在更小的语料库上获得更高的准确性。",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2020-ji-yu">
<titleInfo>
<title>基于强负采样的词嵌入优化算法(Word Embedding Optimization Based on Hard Negative Sampling)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuchen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miaozhe</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiefan</namePart>
<namePart type="family">Zhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Haikou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>word2vec是自然语言处理领域重要的词嵌入算法之一,为了解决随机负采样作为优化目标可能出现的样本贡献消失问题,提出了可以应用在CBOW和Skip-gram框架上的以余弦距离为度量的强负采样方法:HNS-CBOW和HNS-SG。将原随机负采样过程拆解为两个步骤,首先,计算随机负样本与目标词的余弦距离,然后,再使用距离较近的强负样本更新参数。以英文维基百科数据作为实验语料,在公开的语义-语法数据集上对优化算法的效果进行了定量分析,实验表明,优化后的词嵌入质量显著优于原方法。同时,与GloVe等公开发布的预训练词向量相比,可以在更小的语料库上获得更高的准确性。</abstract>
<identifier type="citekey">wang-etal-2020-ji-yu</identifier>
<location>
<url>https://aclanthology.org/2020.ccl-1.20</url>
</location>
<part>
<date>2020-10</date>
<extent unit="page">
<start>207</start>
<end>214</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 基于强负采样的词嵌入优化算法(Word Embedding Optimization Based on Hard Negative Sampling)
%A Wang, Yuchen
%A Lin, Miaozhe
%A Zhan, Jiefan
%Y Sun, Maosong
%Y Li, Sujian
%Y Zhang, Yue
%Y Liu, Yang
%S Proceedings of the 19th Chinese National Conference on Computational Linguistics
%D 2020
%8 October
%I Chinese Information Processing Society of China
%C Haikou, China
%G Chinese
%F wang-etal-2020-ji-yu
%X word2vec是自然语言处理领域重要的词嵌入算法之一,为了解决随机负采样作为优化目标可能出现的样本贡献消失问题,提出了可以应用在CBOW和Skip-gram框架上的以余弦距离为度量的强负采样方法:HNS-CBOW和HNS-SG。将原随机负采样过程拆解为两个步骤,首先,计算随机负样本与目标词的余弦距离,然后,再使用距离较近的强负样本更新参数。以英文维基百科数据作为实验语料,在公开的语义-语法数据集上对优化算法的效果进行了定量分析,实验表明,优化后的词嵌入质量显著优于原方法。同时,与GloVe等公开发布的预训练词向量相比,可以在更小的语料库上获得更高的准确性。
%U https://aclanthology.org/2020.ccl-1.20
%P 207-214
Markdown (Informal)
[基于强负采样的词嵌入优化算法(Word Embedding Optimization Based on Hard Negative Sampling)](https://aclanthology.org/2020.ccl-1.20) (Wang et al., CCL 2020)
ACL