@InProceedings{littell-EtAl:2017:EACLshort,
  author    = {Littell, Patrick  and  Mortensen, David R.  and  Lin, Ke  and  Kairis, Katherine  and  Turner, Carlisle  and  Levin, Lori},
  title     = {URIEL and lang2vec: Representing languages as typological, geographical, and phylogenetic vectors},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {8--14},
  abstract  = {We introduce the URIEL knowledge base for massively multilingual NLP and the
	lang2vec utility, which provides information-rich vector identifications of
	languages drawn from typological, geographical, and phylogenetic databases and
	normalized to have straightforward and consistent formats, naming, and
	semantics.  The goal of URIEL and lang2vec is to enable multilingual NLP,
	especially on less-resourced languages and make possible types of experiments
	(especially but not exclusively related to NLP tasks) that are otherwise
	difficult or impossible due to the sparsity and incommensurability of the data
	sources.  lang2vec vectors have been shown to reduce perplexity in multilingual
	language modeling, when compared to one-hot language identification vectors.},
  url       = {http://www.aclweb.org/anthology/E17-2002}
}

