@InProceedings{vajjala-banerjee:2017:BEA,
  author    = {Vajjala, Sowmya  and  Banerjee, Sagnik},
  title     = {A study of N-gram and Embedding Representations for Native Language Identification},
  booktitle = {Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {240--248},
  abstract  = {We report on our experiments with N-gram and embedding based feature
	representations for Native Language Identification (NLI) as a part of the NLI
	Shared Task 2017 (team name: NLI-ISU). Our best performing system on the test
	set for written essays had a macro F1 of 0.8264 and was based on word uni, bi
	and trigram features. We explored n-grams covering word, character, POS and
	word-POS mixed representations for this task. For embedding based feature
	representations, we employed both word and document embeddings. We had a
	relatively poor performance with all embedding representations compared to
	n-grams, which could be because of the fact that embeddings capture semantic
	similarities whereas L1 differences are more stylistic in nature.},
  url       = {http://www.aclweb.org/anthology/W17-5026}
}

