@inproceedings{kumar-2014-developing,
title = "Developing Politeness Annotated Corpus of {H}indi Blogs",
author = "Kumar, Ritesh",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/594_Paper.pdf",
pages = "1275--1280",
abstract = "In this paper I discuss the creation and annotation of a corpus of Hindi blogs. The corpus consists of a total of over 479,000 blog posts and blog comments. It is annotated with the information about the politeness level of each blog post and blog comment. The annotation is carried out using four levels of politeness ― neutral, appropriate, polite and impolite. For the annotation, three classifiers ― were trained and tested maximum entropy (MaxEnt), Support Vector Machines (SVM) and C4.5 - using around 30,000 manually annotated texts. Among these, C4.5 gave the best accuracy. It achieved an accuracy of around 78{\%} which is within 2{\%} of the human accuracy during annotation. Consequently this classifier is used to annotate the rest of the corpus",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-2014-developing">
<titleInfo>
<title>Developing Politeness Annotated Corpus of Hindi Blogs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper I discuss the creation and annotation of a corpus of Hindi blogs. The corpus consists of a total of over 479,000 blog posts and blog comments. It is annotated with the information about the politeness level of each blog post and blog comment. The annotation is carried out using four levels of politeness ― neutral, appropriate, polite and impolite. For the annotation, three classifiers ― were trained and tested maximum entropy (MaxEnt), Support Vector Machines (SVM) and C4.5 - using around 30,000 manually annotated texts. Among these, C4.5 gave the best accuracy. It achieved an accuracy of around 78% which is within 2% of the human accuracy during annotation. Consequently this classifier is used to annotate the rest of the corpus</abstract>
<identifier type="citekey">kumar-2014-developing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/594_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>1275</start>
<end>1280</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing Politeness Annotated Corpus of Hindi Blogs
%A Kumar, Ritesh
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F kumar-2014-developing
%X In this paper I discuss the creation and annotation of a corpus of Hindi blogs. The corpus consists of a total of over 479,000 blog posts and blog comments. It is annotated with the information about the politeness level of each blog post and blog comment. The annotation is carried out using four levels of politeness ― neutral, appropriate, polite and impolite. For the annotation, three classifiers ― were trained and tested maximum entropy (MaxEnt), Support Vector Machines (SVM) and C4.5 - using around 30,000 manually annotated texts. Among these, C4.5 gave the best accuracy. It achieved an accuracy of around 78% which is within 2% of the human accuracy during annotation. Consequently this classifier is used to annotate the rest of the corpus
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/594_Paper.pdf
%P 1275-1280
Markdown (Informal)
[Developing Politeness Annotated Corpus of Hindi Blogs](http://www.lrec-conf.org/proceedings/lrec2014/pdf/594_Paper.pdf) (Kumar, LREC 2014)
ACL