@inproceedings{chakrabarty-etal-2017-context,
title = "Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks",
author = "Chakrabarty, Abhisek and
Pandit, Onkar Arun and
Garain, Utpal",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P17-1136",
doi = "10.18653/v1/P17-1136",
pages = "1481--1491",
abstract = "We introduce a composite deep neural network architecture for supervised and language independent context sensitive lemmatization. The proposed method considers the task as to identify the correct edit tree representing the transformation between a word-lemma pair. To find the lemma of a surface word, we exploit two successive bidirectional gated recurrent structures - the first one is used to extract the character level dependencies and the next one captures the contextual information of the given word. The key advantages of our model compared to the state-of-the-art lemmatizers such as Lemming and Morfette are - (i) it is independent of human decided features (ii) except the gold lemma, no other expensive morphological attribute is required for joint learning. We evaluate the lemmatizer on nine languages - Bengali, Catalan, Dutch, Hindi, Hungarian, Italian, Latin, Romanian and Spanish. It is found that except Bengali, the proposed method outperforms Lemming and Morfette on the other languages. To train the model on Bengali, we develop a gold lemma annotated dataset (having 1,702 sentences with a total of 20,257 word tokens), which is an additional contribution of this work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chakrabarty-etal-2017-context">
<titleInfo>
<title>Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhisek</namePart>
<namePart type="family">Chakrabarty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onkar</namePart>
<namePart type="given">Arun</namePart>
<namePart type="family">Pandit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Utpal</namePart>
<namePart type="family">Garain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Barzilay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce a composite deep neural network architecture for supervised and language independent context sensitive lemmatization. The proposed method considers the task as to identify the correct edit tree representing the transformation between a word-lemma pair. To find the lemma of a surface word, we exploit two successive bidirectional gated recurrent structures - the first one is used to extract the character level dependencies and the next one captures the contextual information of the given word. The key advantages of our model compared to the state-of-the-art lemmatizers such as Lemming and Morfette are - (i) it is independent of human decided features (ii) except the gold lemma, no other expensive morphological attribute is required for joint learning. We evaluate the lemmatizer on nine languages - Bengali, Catalan, Dutch, Hindi, Hungarian, Italian, Latin, Romanian and Spanish. It is found that except Bengali, the proposed method outperforms Lemming and Morfette on the other languages. To train the model on Bengali, we develop a gold lemma annotated dataset (having 1,702 sentences with a total of 20,257 word tokens), which is an additional contribution of this work.</abstract>
<identifier type="citekey">chakrabarty-etal-2017-context</identifier>
<identifier type="doi">10.18653/v1/P17-1136</identifier>
<location>
<url>https://aclanthology.org/P17-1136</url>
</location>
<part>
<date>2017-07</date>
<extent unit="page">
<start>1481</start>
<end>1491</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks
%A Chakrabarty, Abhisek
%A Pandit, Onkar Arun
%A Garain, Utpal
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F chakrabarty-etal-2017-context
%X We introduce a composite deep neural network architecture for supervised and language independent context sensitive lemmatization. The proposed method considers the task as to identify the correct edit tree representing the transformation between a word-lemma pair. To find the lemma of a surface word, we exploit two successive bidirectional gated recurrent structures - the first one is used to extract the character level dependencies and the next one captures the contextual information of the given word. The key advantages of our model compared to the state-of-the-art lemmatizers such as Lemming and Morfette are - (i) it is independent of human decided features (ii) except the gold lemma, no other expensive morphological attribute is required for joint learning. We evaluate the lemmatizer on nine languages - Bengali, Catalan, Dutch, Hindi, Hungarian, Italian, Latin, Romanian and Spanish. It is found that except Bengali, the proposed method outperforms Lemming and Morfette on the other languages. To train the model on Bengali, we develop a gold lemma annotated dataset (having 1,702 sentences with a total of 20,257 word tokens), which is an additional contribution of this work.
%R 10.18653/v1/P17-1136
%U https://aclanthology.org/P17-1136
%U https://doi.org/10.18653/v1/P17-1136
%P 1481-1491
Markdown (Informal)
[Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks](https://aclanthology.org/P17-1136) (Chakrabarty et al., ACL 2017)
ACL