@InProceedings{bykh-meurers:2016:COLING,
  author    = {Bykh, Serhiy  and  Meurers, Detmar},
  title     = {Advancing Linguistic Features and Insights by Label-informed Feature Grouping: An Exploration in the Context of Native Language Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {739--749},
  abstract  = {We propose a hierarchical clustering approach designed to group linguistic
	features for supervised machine learning that is inspired by variationist
	linguistics. The method makes it possible to abstract away from the individual
	feature occurrences by grouping features together that behave alike with
	respect to the target class, thus providing a new, more general perspective on
	the data. On the one hand, it reduces data sparsity, leading to quantitative
	performance gains. On the other, it supports the formation and evaluation of
	hypotheses about individual choices of linguistic structures. We explore the
	method using features based on verb subcategorization information and evaluate
	the approach in the context of the Native Language Identification (NLI) task.},
  url       = {http://aclweb.org/anthology/C16-1071}
}

