@InProceedings{ogrodniczuk-kopec:2017:LaTeCH-CLfL,
  author    = {Ogrodniczuk, Maciej  and  Kope\'{c}, Mateusz},
  title     = {Lexical Correction of Polish Twitter Political Data},
  booktitle = {Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {115--125},
  abstract  = {Language processing architectures are often evaluated in near-to-perfect
	conditions with respect to processed content. The tools which perform
	sufficiently well on electronic press, books and other type of non-interactive
	content may poorly handle littered, colloquial and multilingual textual data
	which make the majority of communication today. This paper aims at
	investigating how Polish Twitter data (in a slightly controlled `political'
	flavour) differs from expectation of linguistic tools and how they could be
	corrected to be ready for processing by standard language processing chains
	available for Polish. The setting includes specialised components for spelling
	correction of tweets as well as hashtag and username decoding.},
  url       = {http://www.aclweb.org/anthology/W17-2215}
}

