@InProceedings{belainine-fonseca-sadat:2016:WNUT,
  author    = {Belainine, Billal  and  Fonseca, Alexsandro  and  Sadat, Fatiha},
  title     = {Named Entity Recognition and Hashtag Decomposition to Improve the Classification of Tweets},
  booktitle = {Proceedings of the 2nd Workshop on Noisy User-generated Text (WNUT)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {102--111},
  abstract  = {In social networks services like Twitter, users are overwhelmed with huge
	amount of social data,
	most of which are short, unstructured and highly noisy. Identifying accurate
	information from
	this huge amount of data is indeed a hard task. Classification of tweets into
	organized form will
	help the user to easily access these required information. Our first
	contribution relates to filtering
	parts of speech and preprocessing this kind of highly noisy and short data. Our
	second contribution
	concerns the named entity recognition (NER) in tweets. Thus, the adaptation of
	existing
	language tools for natural languages, noisy and not accurate language tweets,
	is necessary. Our
	third contribution involves segmentation of hashtags and a semantic enrichment
	using a combination
	of relations from WordNet, which helps the performance of our classification
	system,
	including disambiguation of named entities, abbreviations and acronyms. Graph
	theory is used
	to cluster the words extracted from WordNet and tweets, based on the idea of
	connected components.
	We test our automatic classification system with four categories: politics,
	economy, sports
	and the medical field. We evaluate and compare several automatic classification
	systems using
	part or all of the items described in our contributions and found that
	filtering by part of speech
	and named entity recognition dramatically increase the classification precision
	to 77.3 %. Moreover,
	a classification system incorporating segmentation of hashtags and semantic
	enrichment by
	two relations from WordNet, synonymy and hyperonymy, increase classification
	precision up to
	83.4 %.},
  url       = {http://aclweb.org/anthology/W16-3915}
}

