@InProceedings{chakrabarty-pandit-garain:2017:Long,
  author    = {Chakrabarty, Abhisek  and  Pandit, Onkar Arun  and  Garain, Utpal},
  title     = {Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {1481--1491},
  abstract  = {We introduce a composite deep neural network architecture for supervised and
	language independent context sensitive lemmatization. The proposed method
	considers the task as to identify the correct edit tree representing the
	transformation between a word-lemma pair. To find the lemma of a surface word,
	we exploit two successive bidirectional gated recurrent structures - the first
	one is used to extract the character level dependencies and the next one
	captures the contextual information of the given word. The key advantages of
	our model compared to the state-of-the-art lemmatizers such as Lemming and
	Morfette are - (i) it is independent of human decided features (ii) except the
	gold lemma, no other expensive morphological attribute is required for joint
	learning. We evaluate the lemmatizer on nine languages - Bengali, Catalan,
	Dutch, Hindi, Hungarian, Italian, Latin, Romanian and Spanish. It is found that
	except Bengali, the proposed method outperforms Lemming and Morfette on the
	other languages. To train the model on Bengali, we develop a gold lemma
	annotated dataset (having 1,702 sentences with a total of 20,257 word tokens),
	which is an additional contribution of this work.},
  url       = {http://aclweb.org/anthology/P17-1136}
}

