@inproceedings{chakrabarty-etal-2016-neural,
title = "A Neural Lemmatizer for {B}engali",
author = "Chakrabarty, Abhisek and
Chaturvedi, Akshay and
Garain, Utpal",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1406",
pages = "2558--2561",
abstract = "We propose a novel neural lemmatization model which is language independent and supervised in nature. To handle the words in a neural framework, word embedding technique is used to represent words as vectors. The proposed lemmatizer makes use of contextual information of the surface word to be lemmatized. Given a word along with its contextual neighbours as input, the model is designed to produce the lemma of the concerned word as output. We introduce a new network architecture that permits only dimension specific connections between the input and the output layer of the model. For the present work, Bengali is taken as the reference language. Two datasets are prepared for training and testing purpose consisting of 19,159 and 2,126 instances respectively. As Bengali is a resource scarce language, these datasets would be beneficial for the respective research community. Evaluation method shows that the neural lemmatizer achieves 69.57{\%} accuracy on the test dataset and outperforms the simple cosine similarity based baseline strategy by a margin of 1.37{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chakrabarty-etal-2016-neural">
<titleInfo>
<title>A Neural Lemmatizer for Bengali</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhisek</namePart>
<namePart type="family">Chakrabarty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshay</namePart>
<namePart type="family">Chaturvedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Utpal</namePart>
<namePart type="family">Garain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a novel neural lemmatization model which is language independent and supervised in nature. To handle the words in a neural framework, word embedding technique is used to represent words as vectors. The proposed lemmatizer makes use of contextual information of the surface word to be lemmatized. Given a word along with its contextual neighbours as input, the model is designed to produce the lemma of the concerned word as output. We introduce a new network architecture that permits only dimension specific connections between the input and the output layer of the model. For the present work, Bengali is taken as the reference language. Two datasets are prepared for training and testing purpose consisting of 19,159 and 2,126 instances respectively. As Bengali is a resource scarce language, these datasets would be beneficial for the respective research community. Evaluation method shows that the neural lemmatizer achieves 69.57% accuracy on the test dataset and outperforms the simple cosine similarity based baseline strategy by a margin of 1.37%.</abstract>
<identifier type="citekey">chakrabarty-etal-2016-neural</identifier>
<location>
<url>https://aclanthology.org/L16-1406</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>2558</start>
<end>2561</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Neural Lemmatizer for Bengali
%A Chakrabarty, Abhisek
%A Chaturvedi, Akshay
%A Garain, Utpal
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F chakrabarty-etal-2016-neural
%X We propose a novel neural lemmatization model which is language independent and supervised in nature. To handle the words in a neural framework, word embedding technique is used to represent words as vectors. The proposed lemmatizer makes use of contextual information of the surface word to be lemmatized. Given a word along with its contextual neighbours as input, the model is designed to produce the lemma of the concerned word as output. We introduce a new network architecture that permits only dimension specific connections between the input and the output layer of the model. For the present work, Bengali is taken as the reference language. Two datasets are prepared for training and testing purpose consisting of 19,159 and 2,126 instances respectively. As Bengali is a resource scarce language, these datasets would be beneficial for the respective research community. Evaluation method shows that the neural lemmatizer achieves 69.57% accuracy on the test dataset and outperforms the simple cosine similarity based baseline strategy by a margin of 1.37%.
%U https://aclanthology.org/L16-1406
%P 2558-2561
Markdown (Informal)
[A Neural Lemmatizer for Bengali](https://aclanthology.org/L16-1406) (Chakrabarty et al., LREC 2016)
ACL
- Abhisek Chakrabarty, Akshay Chaturvedi, and Utpal Garain. 2016. A Neural Lemmatizer for Bengali. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 2558–2561, Portorož, Slovenia. European Language Resources Association (ELRA).