@inproceedings{krishna-etal-2016-compound,
title = "Compound Type Identification in {S}anskrit: What Roles do the Corpus and Grammar Play?",
author = "Krishna, Amrith and
Satuluri, Pavankumar and
Sharma, Shubham and
Kumar, Apurv and
Goyal, Pawan",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3701",
pages = "1--10",
abstract = "We propose a classification framework for semantic type identification of compounds in Sanskrit. We broadly classify the compounds into four different classes namely, \textit{Avyay{\=\i}bh{\=a}va}, \textit{Tatpuruṣa}, \textit{Bahuvr{\=\i}hi} and \textit{Dvandva}. Our classification is based on the traditional classification system followed by the ancient grammar treatise \textit{Adṣṭ{\=a}dhy{\=a}y{\=\i}}, proposed by P{\=a}ṇini 25 centuries back. We construct an elaborate features space for our system by combining conditional rules from the grammar \textit{Adṣṭ{\=a}dhy{\=a}y{\=\i}}, semantic relations between the compound components from a lexical database \textit{Amarakoṣa} and linguistic structures from the data using Adaptor Grammars. Our in-depth analysis of the feature space highlight inadequacy of \textit{Adṣṭ{\=a}dhy{\=a}y{\=\i}}, a generative grammar, in classifying the data samples. Our experimental results validate the effectiveness of using lexical databases as suggested by Amba Kulkarni and Anil Kumar, and put forward a new research direction by introducing linguistic patterns obtained from Adaptor grammars for effective identification of compound type. We utilise an ensemble based approach, specifically designed for handling skewed datasets and we {\%}and Experimenting with various classification methods, we achieve an overall accuracy of 0.77 using random forest classifiers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krishna-etal-2016-compound">
<titleInfo>
<title>Compound Type Identification in Sanskrit: What Roles do the Corpus and Grammar Play?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amrith</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavankumar</namePart>
<namePart type="family">Satuluri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apurv</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawan</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a classification framework for semantic type identification of compounds in Sanskrit. We broadly classify the compounds into four different classes namely, Avyayībhāva, Tatpuruṣa, Bahuvrīhi and Dvandva. Our classification is based on the traditional classification system followed by the ancient grammar treatise Adṣṭādhyāyī, proposed by Pāṇini 25 centuries back. We construct an elaborate features space for our system by combining conditional rules from the grammar Adṣṭādhyāyī, semantic relations between the compound components from a lexical database Amarakoṣa and linguistic structures from the data using Adaptor Grammars. Our in-depth analysis of the feature space highlight inadequacy of Adṣṭādhyāyī, a generative grammar, in classifying the data samples. Our experimental results validate the effectiveness of using lexical databases as suggested by Amba Kulkarni and Anil Kumar, and put forward a new research direction by introducing linguistic patterns obtained from Adaptor grammars for effective identification of compound type. We utilise an ensemble based approach, specifically designed for handling skewed datasets and we %and Experimenting with various classification methods, we achieve an overall accuracy of 0.77 using random forest classifiers.</abstract>
<identifier type="citekey">krishna-etal-2016-compound</identifier>
<location>
<url>https://aclanthology.org/W16-3701</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Compound Type Identification in Sanskrit: What Roles do the Corpus and Grammar Play?
%A Krishna, Amrith
%A Satuluri, Pavankumar
%A Sharma, Shubham
%A Kumar, Apurv
%A Goyal, Pawan
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F krishna-etal-2016-compound
%X We propose a classification framework for semantic type identification of compounds in Sanskrit. We broadly classify the compounds into four different classes namely, Avyayībhāva, Tatpuruṣa, Bahuvrīhi and Dvandva. Our classification is based on the traditional classification system followed by the ancient grammar treatise Adṣṭādhyāyī, proposed by Pāṇini 25 centuries back. We construct an elaborate features space for our system by combining conditional rules from the grammar Adṣṭādhyāyī, semantic relations between the compound components from a lexical database Amarakoṣa and linguistic structures from the data using Adaptor Grammars. Our in-depth analysis of the feature space highlight inadequacy of Adṣṭādhyāyī, a generative grammar, in classifying the data samples. Our experimental results validate the effectiveness of using lexical databases as suggested by Amba Kulkarni and Anil Kumar, and put forward a new research direction by introducing linguistic patterns obtained from Adaptor grammars for effective identification of compound type. We utilise an ensemble based approach, specifically designed for handling skewed datasets and we %and Experimenting with various classification methods, we achieve an overall accuracy of 0.77 using random forest classifiers.
%U https://aclanthology.org/W16-3701
%P 1-10
Markdown (Informal)
[Compound Type Identification in Sanskrit: What Roles do the Corpus and Grammar Play?](https://aclanthology.org/W16-3701) (Krishna et al., WSSANLP 2016)
ACL