@InProceedings{pivovarova-yangarber:2018:W18-30,
  author    = {Pivovarova, Lidia  and  Yangarber, Roman},
  title     = {Comparison of Representations of Named Entities for Document Classification},
  booktitle = {Proceedings of The Third Workshop on Representation Learning for NLP},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {64--68},
  abstract  = {We explore representations for multi-word names in text classification tasks, on Reuters (RCV1) topic and sector classification. We find that: the best way to treat names is to split them into tokens and use each token as a separate feature; NEs have more impact on sector classification than topic classification; replacing NEs with entity types is not an effective strategy; representing tokens by different embeddings for proper names vs. common nouns does not improve results. We highlight the improvements over state-of-the-art results that our CNN models yield.},
  url       = {http://www.aclweb.org/anthology/W18-3008}
}

