@InProceedings{lejeune-cartier:2017:SCLeM,
  author    = {Lejeune, Ga\"{e}l  and  Cartier, Emmanuel},
  title     = {Character Based Pattern Mining for Neology Detection},
  booktitle = {Proceedings of the First Workshop on Subword and Character Level Models in NLP},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {25--30},
  abstract  = {Detecting neologisms is essential in real-time natural language processing
	applications. Not only can it enable to follow the lexical evolution of
	languages, but it is also essential for updating  linguistic resources and
	parsers.
	 In this paper, neology detection is considered as a classification task where
	a system has to assess whether a given lexical item is an actual neologism or
	not.
	 We propose a combination of an unsupervised data mining technique and a
	supervised machine learning approach.
	 It is inspired by current researches in stylometry and on token-level and
	character-level patterns. 
	 We train and evaluate our system on a manually designed reference dataset in
	French and Russian.
	 We show that this approach is able to largely outperform state-of-the-art
	neology detection systems. Furthermore, character-level patterns exhibit good
	properties for multilingual extensions of the system.},
  url       = {http://www.aclweb.org/anthology/W17-4103}
}

