@inproceedings{ambati-etal-2010-high,
title = "A High Recall Error Identification Tool for {H}indi Treebank Validation",
author = "Ambati, Bharat Ram and
Gupta, Mridul and
Husain, Samar and
Sharma, Dipti Misra",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/673_Paper.pdf",
abstract = "This paper describes the development of a hybrid tool for a semi-automated process for validation of treebank annotation at various levels. The tool is developed for error detection at the part-of-speech, chunk and dependency levels of a Hindi treebank, currently under development. The tool aims to identify as many errors as possible at these levels to achieve consistency in the task of annotation. Consistency in treebank annotation is a must for making data as error-free as possible and for providing quality assurance. The tool is aimed at ensuring consistency and to make manual validation cost effective. We discuss a rule based and a hybrid approach (statistical methods combined with rule-based methods) by which a high-recall system can be developed and used to identify errors in the treebank. We report some results of using the tool on a sample of data extracted from the Hindi treebank. We also argue how the tool can prove useful in improving the annotation guidelines which would in turn, better the quality of annotation in subsequent iterations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ambati-etal-2010-high">
<titleInfo>
<title>A High Recall Error Identification Tool for Hindi Treebank Validation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharat</namePart>
<namePart type="given">Ram</namePart>
<namePart type="family">Ambati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mridul</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samar</namePart>
<namePart type="family">Husain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the development of a hybrid tool for a semi-automated process for validation of treebank annotation at various levels. The tool is developed for error detection at the part-of-speech, chunk and dependency levels of a Hindi treebank, currently under development. The tool aims to identify as many errors as possible at these levels to achieve consistency in the task of annotation. Consistency in treebank annotation is a must for making data as error-free as possible and for providing quality assurance. The tool is aimed at ensuring consistency and to make manual validation cost effective. We discuss a rule based and a hybrid approach (statistical methods combined with rule-based methods) by which a high-recall system can be developed and used to identify errors in the treebank. We report some results of using the tool on a sample of data extracted from the Hindi treebank. We also argue how the tool can prove useful in improving the annotation guidelines which would in turn, better the quality of annotation in subsequent iterations.</abstract>
<identifier type="citekey">ambati-etal-2010-high</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/673_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A High Recall Error Identification Tool for Hindi Treebank Validation
%A Ambati, Bharat Ram
%A Gupta, Mridul
%A Husain, Samar
%A Sharma, Dipti Misra
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F ambati-etal-2010-high
%X This paper describes the development of a hybrid tool for a semi-automated process for validation of treebank annotation at various levels. The tool is developed for error detection at the part-of-speech, chunk and dependency levels of a Hindi treebank, currently under development. The tool aims to identify as many errors as possible at these levels to achieve consistency in the task of annotation. Consistency in treebank annotation is a must for making data as error-free as possible and for providing quality assurance. The tool is aimed at ensuring consistency and to make manual validation cost effective. We discuss a rule based and a hybrid approach (statistical methods combined with rule-based methods) by which a high-recall system can be developed and used to identify errors in the treebank. We report some results of using the tool on a sample of data extracted from the Hindi treebank. We also argue how the tool can prove useful in improving the annotation guidelines which would in turn, better the quality of annotation in subsequent iterations.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/673_Paper.pdf
Markdown (Informal)
[A High Recall Error Identification Tool for Hindi Treebank Validation](http://www.lrec-conf.org/proceedings/lrec2010/pdf/673_Paper.pdf) (Ambati et al., LREC 2010)
ACL