@InProceedings{pate-johnson:2016:COLING,
  author    = {Pate, John K  and  Johnson, Mark},
  title     = {Grammar induction from (lots of) words alone},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {23--32},
  abstract  = {Grammar induction is the task of learning syntactic structure in a setting
	where that structure is hidden. Grammar induction from words alone is
	interesting because it is similiar to the problem that a child learning a
	language faces. Previous work has typically assumed richer but cognitively
	implausible input, such as POS tag annotated data, which makes that work less
	relevant to human language acquisition. We show that grammar induction from
	words alone is in fact feasible when the model is provided with sufficient
	training data, and present two new streaming or mini-batch algorithms for PCFG
	inference that can learn from millions of words of training data. We compare
	the performance of these algorithms to a batch algorithm that learns from less
	data. The minibatch algorithms outperform the batch algorithm, showing that
	cheap inference with more data is better than intensive inference with less
	data. Additionally, we show that the harmonic initialiser, which previous work
	identified as essential when learning from small POS-tag annotated corpora
	(Klein and Manning, 2004), is not superior to a uniform initialisation.},
  url       = {http://aclweb.org/anthology/C16-1003}
}