@inproceedings{nath-etal-2008-unsupervised,
title = "Unsupervised Parts-of-Speech Induction for {B}engali",
author = "Nath, Joydeep and
Choudhury, Monojit and
Mukherjee, Animesh and
Biemann, Christian and
Ganguly, Niloy",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/309_paper.pdf",
abstract = "We present a study of the word interaction networks of Bengali in the framework of complex networks. The topological properties of these networks reveal interesting insights into the morpho-syntax of the language, whereas clustering helps in the induction of the natural word classes leading to a principled way of designing POS tagsets. We compare different network construction techniques and clustering algorithms based on the cohesiveness of the word clusters. Cohesiveness is measured against two gold-standard tagsets by means of the novel metric of tag-entropy. The approach presented here is a generic one that can be easily extended to any language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nath-etal-2008-unsupervised">
<titleInfo>
<title>Unsupervised Parts-of-Speech Induction for Bengali</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joydeep</namePart>
<namePart type="family">Nath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Animesh</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niloy</namePart>
<namePart type="family">Ganguly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a study of the word interaction networks of Bengali in the framework of complex networks. The topological properties of these networks reveal interesting insights into the morpho-syntax of the language, whereas clustering helps in the induction of the natural word classes leading to a principled way of designing POS tagsets. We compare different network construction techniques and clustering algorithms based on the cohesiveness of the word clusters. Cohesiveness is measured against two gold-standard tagsets by means of the novel metric of tag-entropy. The approach presented here is a generic one that can be easily extended to any language.</abstract>
<identifier type="citekey">nath-etal-2008-unsupervised</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/309_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Parts-of-Speech Induction for Bengali
%A Nath, Joydeep
%A Choudhury, Monojit
%A Mukherjee, Animesh
%A Biemann, Christian
%A Ganguly, Niloy
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F nath-etal-2008-unsupervised
%X We present a study of the word interaction networks of Bengali in the framework of complex networks. The topological properties of these networks reveal interesting insights into the morpho-syntax of the language, whereas clustering helps in the induction of the natural word classes leading to a principled way of designing POS tagsets. We compare different network construction techniques and clustering algorithms based on the cohesiveness of the word clusters. Cohesiveness is measured against two gold-standard tagsets by means of the novel metric of tag-entropy. The approach presented here is a generic one that can be easily extended to any language.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/309_paper.pdf
Markdown (Informal)
[Unsupervised Parts-of-Speech Induction for Bengali](http://www.lrec-conf.org/proceedings/lrec2008/pdf/309_paper.pdf) (Nath et al., LREC 2008)
ACL
- Joydeep Nath, Monojit Choudhury, Animesh Mukherjee, Christian Biemann, and Niloy Ganguly. 2008. Unsupervised Parts-of-Speech Induction for Bengali. In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), Marrakech, Morocco. European Language Resources Association (ELRA).