@inproceedings{pan-etal-2019-cross,
    title = "Cross-lingual Joint Entity and Word Embedding to Improve Entity Linking and Parallel Sentence Mining",
    author = "Pan, Xiaoman  and
      Gowda, Thamme  and
      Ji, Heng  and
      May, Jonathan  and
      Miller, Scott",
    editor = "Cherry, Colin  and
      Durrett, Greg  and
      Foster, George  and
      Haffari, Reza  and
      Khadivi, Shahram  and
      Peng, Nanyun  and
      Ren, Xiang  and
      Swayamdipta, Swabha",
    booktitle = "Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D19-6107/",
    doi = "10.18653/v1/D19-6107",
    pages = "56--66",
    abstract = "Entities, which refer to distinct objects in the real world, can be viewed as language universals and used as effective signals to generate less ambiguous semantic representations and align multiple languages. We propose a novel method, CLEW, to generate cross-lingual data that is a mix of entities and contextual words based on Wikipedia. We replace each anchor link in the source language with its corresponding entity title in the target language if it exists, or in the source language otherwise. A cross-lingual joint entity and word embedding learned from this kind of data not only can disambiguate linkable entities but can also effectively represent unlinkable entities. Because this multilingual common space directly relates the semantics of contextual words in the source language to that of entities in the target language, we leverage it for unsupervised cross-lingual entity linking. Experimental results show that CLEW significantly advances the state-of-the-art: up to 3.1{\%} absolute F-score gain for unsupervised cross-lingual entity linking. Moreover, it provides reliable alignment on both the word/entity level and the sentence level, and thus we use it to mine parallel sentences for all (302, 2) language pairs in Wikipedia."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pan-etal-2019-cross">
    <titleInfo>
        <title>Cross-lingual Joint Entity and Word Embedding to Improve Entity Linking and Parallel Sentence Mining</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Xiaoman</namePart>
        <namePart type="family">Pan</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Thamme</namePart>
        <namePart type="family">Gowda</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Heng</namePart>
        <namePart type="family">Ji</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jonathan</namePart>
        <namePart type="family">May</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Scott</namePart>
        <namePart type="family">Miller</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2019-11</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Colin</namePart>
            <namePart type="family">Cherry</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Greg</namePart>
            <namePart type="family">Durrett</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">George</namePart>
            <namePart type="family">Foster</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Reza</namePart>
            <namePart type="family">Haffari</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Shahram</namePart>
            <namePart type="family">Khadivi</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Nanyun</namePart>
            <namePart type="family">Peng</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Xiang</namePart>
            <namePart type="family">Ren</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Swabha</namePart>
            <namePart type="family">Swayamdipta</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Hong Kong, China</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Entities, which refer to distinct objects in the real world, can be viewed as language universals and used as effective signals to generate less ambiguous semantic representations and align multiple languages. We propose a novel method, CLEW, to generate cross-lingual data that is a mix of entities and contextual words based on Wikipedia. We replace each anchor link in the source language with its corresponding entity title in the target language if it exists, or in the source language otherwise. A cross-lingual joint entity and word embedding learned from this kind of data not only can disambiguate linkable entities but can also effectively represent unlinkable entities. Because this multilingual common space directly relates the semantics of contextual words in the source language to that of entities in the target language, we leverage it for unsupervised cross-lingual entity linking. Experimental results show that CLEW significantly advances the state-of-the-art: up to 3.1% absolute F-score gain for unsupervised cross-lingual entity linking. Moreover, it provides reliable alignment on both the word/entity level and the sentence level, and thus we use it to mine parallel sentences for all (302, 2) language pairs in Wikipedia.</abstract>
    <identifier type="citekey">pan-etal-2019-cross</identifier>
    <identifier type="doi">10.18653/v1/D19-6107</identifier>
    <location>
        <url>https://aclanthology.org/D19-6107/</url>
    </location>
    <part>
        <date>2019-11</date>
        <extent unit="page">
            <start>56</start>
            <end>66</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-lingual Joint Entity and Word Embedding to Improve Entity Linking and Parallel Sentence Mining
%A Pan, Xiaoman
%A Gowda, Thamme
%A Ji, Heng
%A May, Jonathan
%A Miller, Scott
%Y Cherry, Colin
%Y Durrett, Greg
%Y Foster, George
%Y Haffari, Reza
%Y Khadivi, Shahram
%Y Peng, Nanyun
%Y Ren, Xiang
%Y Swayamdipta, Swabha
%S Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F pan-etal-2019-cross
%X Entities, which refer to distinct objects in the real world, can be viewed as language universals and used as effective signals to generate less ambiguous semantic representations and align multiple languages. We propose a novel method, CLEW, to generate cross-lingual data that is a mix of entities and contextual words based on Wikipedia. We replace each anchor link in the source language with its corresponding entity title in the target language if it exists, or in the source language otherwise. A cross-lingual joint entity and word embedding learned from this kind of data not only can disambiguate linkable entities but can also effectively represent unlinkable entities. Because this multilingual common space directly relates the semantics of contextual words in the source language to that of entities in the target language, we leverage it for unsupervised cross-lingual entity linking. Experimental results show that CLEW significantly advances the state-of-the-art: up to 3.1% absolute F-score gain for unsupervised cross-lingual entity linking. Moreover, it provides reliable alignment on both the word/entity level and the sentence level, and thus we use it to mine parallel sentences for all (302, 2) language pairs in Wikipedia.
%R 10.18653/v1/D19-6107
%U https://aclanthology.org/D19-6107/
%U https://doi.org/10.18653/v1/D19-6107
%P 56-66
Markdown (Informal)
[Cross-lingual Joint Entity and Word Embedding to Improve Entity Linking and Parallel Sentence Mining](https://aclanthology.org/D19-6107/) (Pan et al., 2019)
ACL