@inproceedings{bhat-etal-2014-towards,
title = "Towards building a {K}ashmiri Treebank: Setting up the Annotation Pipeline",
author = "Bhat, Riyaz Ahmad and
Bhat, Shahid Mushtaq and
Sharma, Dipti Misra",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/18_Paper.pdf",
pages = "748--752",
abstract = "Kashmiri is a resource poor language with very less computational and language resources available for its text processing. As the main contribution of this paper, we present an initial version of the Kashmiri Dependency Treebank. The treebank consists of 1,000 sentences (17,462 tokens), annotated with part-of-speech (POS), chunk and dependency information. The treebank has been manually annotated using the Paninian Computational Grammar (PCG) formalism (Begum et al., 2008; Bharati et al., 2009). This version of Kashmiri treebank is an extension of its earlier verion of 500 sentences (Bhat, 2012), a pilot experiment aimed at defining the annotation guidelines on a small subset of Kashmiri corpora. In this paper, we have refined the guidelines with some significant changes and have carried out inter-annotator agreement studies to ascertain its quality. We also present a dependency parsing pipeline, consisting of a tokenizer, a stemmer, a POS tagger, a chunker and an inter-chunk dependency parser. It, therefore, constitutes the first freely available, open source dependency parser of Kashmiri, setting the initial baseline for Kashmiri dependency parsing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhat-etal-2014-towards">
<titleInfo>
<title>Towards building a Kashmiri Treebank: Setting up the Annotation Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Riyaz</namePart>
<namePart type="given">Ahmad</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahid</namePart>
<namePart type="given">Mushtaq</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Kashmiri is a resource poor language with very less computational and language resources available for its text processing. As the main contribution of this paper, we present an initial version of the Kashmiri Dependency Treebank. The treebank consists of 1,000 sentences (17,462 tokens), annotated with part-of-speech (POS), chunk and dependency information. The treebank has been manually annotated using the Paninian Computational Grammar (PCG) formalism (Begum et al., 2008; Bharati et al., 2009). This version of Kashmiri treebank is an extension of its earlier verion of 500 sentences (Bhat, 2012), a pilot experiment aimed at defining the annotation guidelines on a small subset of Kashmiri corpora. In this paper, we have refined the guidelines with some significant changes and have carried out inter-annotator agreement studies to ascertain its quality. We also present a dependency parsing pipeline, consisting of a tokenizer, a stemmer, a POS tagger, a chunker and an inter-chunk dependency parser. It, therefore, constitutes the first freely available, open source dependency parser of Kashmiri, setting the initial baseline for Kashmiri dependency parsing.</abstract>
<identifier type="citekey">bhat-etal-2014-towards</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/18_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>748</start>
<end>752</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards building a Kashmiri Treebank: Setting up the Annotation Pipeline
%A Bhat, Riyaz Ahmad
%A Bhat, Shahid Mushtaq
%A Sharma, Dipti Misra
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F bhat-etal-2014-towards
%X Kashmiri is a resource poor language with very less computational and language resources available for its text processing. As the main contribution of this paper, we present an initial version of the Kashmiri Dependency Treebank. The treebank consists of 1,000 sentences (17,462 tokens), annotated with part-of-speech (POS), chunk and dependency information. The treebank has been manually annotated using the Paninian Computational Grammar (PCG) formalism (Begum et al., 2008; Bharati et al., 2009). This version of Kashmiri treebank is an extension of its earlier verion of 500 sentences (Bhat, 2012), a pilot experiment aimed at defining the annotation guidelines on a small subset of Kashmiri corpora. In this paper, we have refined the guidelines with some significant changes and have carried out inter-annotator agreement studies to ascertain its quality. We also present a dependency parsing pipeline, consisting of a tokenizer, a stemmer, a POS tagger, a chunker and an inter-chunk dependency parser. It, therefore, constitutes the first freely available, open source dependency parser of Kashmiri, setting the initial baseline for Kashmiri dependency parsing.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/18_Paper.pdf
%P 748-752
Markdown (Informal)
[Towards building a Kashmiri Treebank: Setting up the Annotation Pipeline](http://www.lrec-conf.org/proceedings/lrec2014/pdf/18_Paper.pdf) (Bhat et al., LREC 2014)
ACL