@InProceedings{das-saha-sarkar:2016:WSSANLP2016,
  author    = {Das, Ayan  and  Saha, Agnivo  and  Sarkar, Sudeshna},
  title     = {Development of a Bengali parser by cross-lingual transfer from Hindi},
  booktitle = {Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {33--43},
  abstract  = {In recent years there has been a lot of interest in cross-lingual parsing for
	developing treebanks for languages with small or no annotated treebanks. In
	this paper, we explore the development of a cross-lingual transfer parser from
	Hindi to Bengali using a Hindi parser and a Hindi-Bengali parallel corpus. A
	parser is trained and applied to the Hindi sentences of the parallel corpus
	and the parse trees are projected to construct probable parse trees of the
	corresponding Bengali sentences. Only about 14% of these trees are complete
	(transferred trees contain all the target sentence words) and they are used to
	construct a Bengali parser. We relax the criteria of completeness to consider
	well-formed trees (43% of the trees) leading to an improvement. We note
	that the words often do not have a one-to-one mapping in the two languages but
	considering sentences at the chunk-level results in better correspondence
	between the two languages. Based on this we present a method to use chunking as
	a preprocessing step and do the transfer on the chunk trees. We find that about
	72% of the projected parse trees of Bengali are now well-formed. The resultant
	parser achieves significant improvement in both Unlabeled Attachment Score
	(UAS) as
	well as Labeled Attachment Score (LAS) over the baseline word-level transferred
	parser.
	Author{3}{Affiliation}},
  url       = {http://aclweb.org/anthology/W16-3704}
}