@inproceedings{blissett-ji-2019-cross,
title = "Cross-lingual {NIL} Entity Clustering for Low-resource Languages",
author = "Blissett, Kevin and
Ji, Heng",
editor = "Ogrodniczuk, Maciej and
Pradhan, Sameer and
Grishina, Yulia and
Ng, Vincent",
booktitle = "Proceedings of the Second Workshop on Computational Models of Reference, Anaphora and Coreference",
month = jun,
year = "2019",
address = "Minneapolis, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2804",
doi = "10.18653/v1/W19-2804",
pages = "20--25",
abstract = "Clustering unlinkable entity mentions across documents in multiple languages (cross-lingual NIL Clustering) is an important task as part of Entity Discovery and Linking (EDL). This task has been largely neglected by the EDL community because it is challenging to outperform simple edit distance or other heuristics based baselines. We propose a novel approach based on encoding the orthographic similarity of the mentions using a Recurrent Neural Network (RNN) architecture. Our model adapts a training procedure from the one-shot facial recognition literature in order to achieve this. We also perform several exploratory probing tasks on our name encodings in order to determine what specific types of information are likely to be encoded by our model. Experiments show our approach provides up to a 6.6{\%} absolute CEAFm F-Score improvement over state-of-the-art methods and successfully captures phonological relations across languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blissett-ji-2019-cross">
<titleInfo>
<title>Cross-lingual NIL Entity Clustering for Low-resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Blissett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Computational Models of Reference, Anaphora and Coreference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pradhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulia</namePart>
<namePart type="family">Grishina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Clustering unlinkable entity mentions across documents in multiple languages (cross-lingual NIL Clustering) is an important task as part of Entity Discovery and Linking (EDL). This task has been largely neglected by the EDL community because it is challenging to outperform simple edit distance or other heuristics based baselines. We propose a novel approach based on encoding the orthographic similarity of the mentions using a Recurrent Neural Network (RNN) architecture. Our model adapts a training procedure from the one-shot facial recognition literature in order to achieve this. We also perform several exploratory probing tasks on our name encodings in order to determine what specific types of information are likely to be encoded by our model. Experiments show our approach provides up to a 6.6% absolute CEAFm F-Score improvement over state-of-the-art methods and successfully captures phonological relations across languages.</abstract>
<identifier type="citekey">blissett-ji-2019-cross</identifier>
<identifier type="doi">10.18653/v1/W19-2804</identifier>
<location>
<url>https://aclanthology.org/W19-2804</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>20</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-lingual NIL Entity Clustering for Low-resource Languages
%A Blissett, Kevin
%A Ji, Heng
%Y Ogrodniczuk, Maciej
%Y Pradhan, Sameer
%Y Grishina, Yulia
%Y Ng, Vincent
%S Proceedings of the Second Workshop on Computational Models of Reference, Anaphora and Coreference
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, USA
%F blissett-ji-2019-cross
%X Clustering unlinkable entity mentions across documents in multiple languages (cross-lingual NIL Clustering) is an important task as part of Entity Discovery and Linking (EDL). This task has been largely neglected by the EDL community because it is challenging to outperform simple edit distance or other heuristics based baselines. We propose a novel approach based on encoding the orthographic similarity of the mentions using a Recurrent Neural Network (RNN) architecture. Our model adapts a training procedure from the one-shot facial recognition literature in order to achieve this. We also perform several exploratory probing tasks on our name encodings in order to determine what specific types of information are likely to be encoded by our model. Experiments show our approach provides up to a 6.6% absolute CEAFm F-Score improvement over state-of-the-art methods and successfully captures phonological relations across languages.
%R 10.18653/v1/W19-2804
%U https://aclanthology.org/W19-2804
%U https://doi.org/10.18653/v1/W19-2804
%P 20-25
Markdown (Informal)
[Cross-lingual NIL Entity Clustering for Low-resource Languages](https://aclanthology.org/W19-2804) (Blissett & Ji, CRAC 2019)
ACL