@inproceedings{huang-etal-2019-hubless,
title = "Hubless Nearest Neighbor Search for Bilingual Lexicon Induction",
author = "Huang, Jiaji and
Qiu, Qiang and
Church, Kenneth",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1399/",
doi = "10.18653/v1/P19-1399",
pages = "4072--4080",
abstract = "Bilingual Lexicon Induction (BLI) is the task of translating words from corpora in two languages. Recent advances in BLI work by aligning the two word embedding spaces. Following that, a key step is to retrieve the nearest neighbor (NN) in the target space given the source word. However, a phenomenon called hubness often degrades the accuracy of NN. Hubness appears as some data points, called hubs, being extra-ordinarily close to many of the other data points. Reducing hubness is necessary for retrieval tasks. One successful example is Inverted SoFtmax (ISF), recently proposed to improve NN. This work proposes a new method, Hubless Nearest Neighbor (HNN), to mitigate hubness. HNN differs from NN by imposing an additional equal preference assumption. Moreover, the HNN formulation explains why ISF works as well as it does. Empirical results demonstrate that HNN outperforms NN, ISF and other state-of-the-art. For reproducibility and follow-ups, we have published all code."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2019-hubless">
<titleInfo>
<title>Hubless Nearest Neighbor Search for Bilingual Lexicon Induction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiaji</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Church</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Bilingual Lexicon Induction (BLI) is the task of translating words from corpora in two languages. Recent advances in BLI work by aligning the two word embedding spaces. Following that, a key step is to retrieve the nearest neighbor (NN) in the target space given the source word. However, a phenomenon called hubness often degrades the accuracy of NN. Hubness appears as some data points, called hubs, being extra-ordinarily close to many of the other data points. Reducing hubness is necessary for retrieval tasks. One successful example is Inverted SoFtmax (ISF), recently proposed to improve NN. This work proposes a new method, Hubless Nearest Neighbor (HNN), to mitigate hubness. HNN differs from NN by imposing an additional equal preference assumption. Moreover, the HNN formulation explains why ISF works as well as it does. Empirical results demonstrate that HNN outperforms NN, ISF and other state-of-the-art. For reproducibility and follow-ups, we have published all code.</abstract>
<identifier type="citekey">huang-etal-2019-hubless</identifier>
<identifier type="doi">10.18653/v1/P19-1399</identifier>
<location>
<url>https://aclanthology.org/P19-1399/</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>4072</start>
<end>4080</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hubless Nearest Neighbor Search for Bilingual Lexicon Induction
%A Huang, Jiaji
%A Qiu, Qiang
%A Church, Kenneth
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F huang-etal-2019-hubless
%X Bilingual Lexicon Induction (BLI) is the task of translating words from corpora in two languages. Recent advances in BLI work by aligning the two word embedding spaces. Following that, a key step is to retrieve the nearest neighbor (NN) in the target space given the source word. However, a phenomenon called hubness often degrades the accuracy of NN. Hubness appears as some data points, called hubs, being extra-ordinarily close to many of the other data points. Reducing hubness is necessary for retrieval tasks. One successful example is Inverted SoFtmax (ISF), recently proposed to improve NN. This work proposes a new method, Hubless Nearest Neighbor (HNN), to mitigate hubness. HNN differs from NN by imposing an additional equal preference assumption. Moreover, the HNN formulation explains why ISF works as well as it does. Empirical results demonstrate that HNN outperforms NN, ISF and other state-of-the-art. For reproducibility and follow-ups, we have published all code.
%R 10.18653/v1/P19-1399
%U https://aclanthology.org/P19-1399/
%U https://doi.org/10.18653/v1/P19-1399
%P 4072-4080
Markdown (Informal)
[Hubless Nearest Neighbor Search for Bilingual Lexicon Induction](https://aclanthology.org/P19-1399/) (Huang et al., ACL 2019)
ACL