@InProceedings{bhat-bhat-sharma:2017:IWPT,
  author    = {Bhat, Riyaz A.  and  Bhat, Irshad  and  Sharma, Dipti},
  title     = {Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling},
  booktitle = {Proceedings of the 15th International Conference on Parsing Technologies},
  month     = {September},
  year      = {2017},
  address   = {Pisa, Italy},
  publisher = {Association for Computational Linguistics},
  pages     = {61--66},
  abstract  = {We investigate the problem of parsing conversational data of
	morphologically-rich languages such as Hindi where argument scrambling occurs
	frequently. We evaluate a state-of-the-art non-linear transition-based parsing
	system on a new dataset containing 506 dependency trees for sentences from
	Bollywood (Hindi) movie scripts and Twitter posts of Hindi monolingual
	speakers. We show that a dependency parser trained on a newswire treebank is
	strongly biased towards the canonical structures and degrades when applied to
	conversational data. Inspired by Transformational Generative Grammar (Chomsky,
	1965), we mitigate the sampling bias by generating all theoretically possible
	alternative word orders of a clause from the existing (kernel) structures in
	the treebank. Training our parser on canonical and transformed structures
	improves performance on conversational data by around 9% LAS over the baseline
	newswire parser.},
  url       = {http://www.aclweb.org/anthology/W17-6309}
}

