@inproceedings{barrena-etal-2018-learning,
title = "Learning Text Representations for 500{K} Classification Tasks on Named Entity Disambiguation",
author = "Barrena, Ander and
Soroa, Aitor and
Agirre, Eneko",
editor = "Korhonen, Anna and
Titov, Ivan",
booktitle = "Proceedings of the 22nd Conference on Computational Natural Language Learning",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K18-1017",
doi = "10.18653/v1/K18-1017",
pages = "171--180",
abstract = "Named Entity Disambiguation algorithms typically learn a single model for all target entities. In this paper we present a word expert model and train separate deep learning models for each target entity string, yielding 500K classification tasks. This gives us the opportunity to benchmark popular text representation alternatives on this massive dataset. In order to face scarce training data we propose a simple data-augmentation technique and transfer-learning. We show that bag-of-word-embeddings are better than LSTMs for tasks with scarce training data, while the situation is reversed when having larger amounts. Transferring a LSTM which is learned on all datasets is the most effective context representation option for the word experts in all frequency bands. The experiments show that our system trained on out-of-domain Wikipedia data surpass comparable NED systems which have been trained on in-domain training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barrena-etal-2018-learning">
<titleInfo>
<title>Learning Text Representations for 500K Classification Tasks on Named Entity Disambiguation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ander</namePart>
<namePart type="family">Barrena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aitor</namePart>
<namePart type="family">Soroa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eneko</namePart>
<namePart type="family">Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Titov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named Entity Disambiguation algorithms typically learn a single model for all target entities. In this paper we present a word expert model and train separate deep learning models for each target entity string, yielding 500K classification tasks. This gives us the opportunity to benchmark popular text representation alternatives on this massive dataset. In order to face scarce training data we propose a simple data-augmentation technique and transfer-learning. We show that bag-of-word-embeddings are better than LSTMs for tasks with scarce training data, while the situation is reversed when having larger amounts. Transferring a LSTM which is learned on all datasets is the most effective context representation option for the word experts in all frequency bands. The experiments show that our system trained on out-of-domain Wikipedia data surpass comparable NED systems which have been trained on in-domain training data.</abstract>
<identifier type="citekey">barrena-etal-2018-learning</identifier>
<identifier type="doi">10.18653/v1/K18-1017</identifier>
<location>
<url>https://aclanthology.org/K18-1017</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>171</start>
<end>180</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Text Representations for 500K Classification Tasks on Named Entity Disambiguation
%A Barrena, Ander
%A Soroa, Aitor
%A Agirre, Eneko
%Y Korhonen, Anna
%Y Titov, Ivan
%S Proceedings of the 22nd Conference on Computational Natural Language Learning
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F barrena-etal-2018-learning
%X Named Entity Disambiguation algorithms typically learn a single model for all target entities. In this paper we present a word expert model and train separate deep learning models for each target entity string, yielding 500K classification tasks. This gives us the opportunity to benchmark popular text representation alternatives on this massive dataset. In order to face scarce training data we propose a simple data-augmentation technique and transfer-learning. We show that bag-of-word-embeddings are better than LSTMs for tasks with scarce training data, while the situation is reversed when having larger amounts. Transferring a LSTM which is learned on all datasets is the most effective context representation option for the word experts in all frequency bands. The experiments show that our system trained on out-of-domain Wikipedia data surpass comparable NED systems which have been trained on in-domain training data.
%R 10.18653/v1/K18-1017
%U https://aclanthology.org/K18-1017
%U https://doi.org/10.18653/v1/K18-1017
%P 171-180
Markdown (Informal)
[Learning Text Representations for 500K Classification Tasks on Named Entity Disambiguation](https://aclanthology.org/K18-1017) (Barrena et al., CoNLL 2018)
ACL