@InProceedings{falkenjack-jonsson:2016:CL4LC,
  author    = {Falkenjack, Johan  and  Jonsson, Arne},
  title     = {Implicit readability ranking using the latent variable of a Bayesian Probit model},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {104--112},
  abstract  = {Data driven approaches to readability analysis for languages other than English
	has been plagued by a scarcity of suitable corpora. Often, relevant corpora
	consist only of easy-to-read texts with no rank information or empirical
	readability scores, making only binary approaches, such as classification,
	applicable. We propose a Bayesian, latent variable, approach to get the most
	out of these kinds of corpora. In this paper we present results on using such a
	model for readability ranking. The model is evaluated on a preliminary corpus
	of ranked student texts with encouraging results. We also assess the model by
	showing that it performs readability classification on par with a state of the
	art classifier while at the same being transparent enough to allow more
	sophisticated interpretations.},
  url       = {http://aclweb.org/anthology/W16-4112}
}

