@inproceedings{poibeau-messiant-2008-still,
title = "Do we Still Need Gold Standards for Evaluation?",
author = "Poibeau, Thierry and
Messiant, C{\'e}dric",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/144_paper.pdf",
abstract = "The availability of a huge mass of textual data in electronic format has increased the need for fast and accurate techniques for textual data processing. Machine learning and statistical approaches have been increasingly used in NLP since a decade, mainly because they are quick, versatile and efficient. However, despite this evolution of the field, evaluation still rely (most of the time) on a comparison between the output of a probabilistic or statistical system on the one hand, and a non-statistic, most of the time hand-crafted, gold standard on the other hand. In this paper, we take the example of the acquisition of subcategorization frames from corpora as a practical example. Our study is motivated by the fact that, even if a gold standard is an invaluable resource for evaluation, a gold standard is always partial and does not really show how accurate and useful results are.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="poibeau-messiant-2008-still">
<titleInfo>
<title>Do we Still Need Gold Standards for Evaluation?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Poibeau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cédric</namePart>
<namePart type="family">Messiant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The availability of a huge mass of textual data in electronic format has increased the need for fast and accurate techniques for textual data processing. Machine learning and statistical approaches have been increasingly used in NLP since a decade, mainly because they are quick, versatile and efficient. However, despite this evolution of the field, evaluation still rely (most of the time) on a comparison between the output of a probabilistic or statistical system on the one hand, and a non-statistic, most of the time hand-crafted, gold standard on the other hand. In this paper, we take the example of the acquisition of subcategorization frames from corpora as a practical example. Our study is motivated by the fact that, even if a gold standard is an invaluable resource for evaluation, a gold standard is always partial and does not really show how accurate and useful results are.</abstract>
<identifier type="citekey">poibeau-messiant-2008-still</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/144_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do we Still Need Gold Standards for Evaluation?
%A Poibeau, Thierry
%A Messiant, Cédric
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F poibeau-messiant-2008-still
%X The availability of a huge mass of textual data in electronic format has increased the need for fast and accurate techniques for textual data processing. Machine learning and statistical approaches have been increasingly used in NLP since a decade, mainly because they are quick, versatile and efficient. However, despite this evolution of the field, evaluation still rely (most of the time) on a comparison between the output of a probabilistic or statistical system on the one hand, and a non-statistic, most of the time hand-crafted, gold standard on the other hand. In this paper, we take the example of the acquisition of subcategorization frames from corpora as a practical example. Our study is motivated by the fact that, even if a gold standard is an invaluable resource for evaluation, a gold standard is always partial and does not really show how accurate and useful results are.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/144_paper.pdf
Markdown (Informal)
[Do we Still Need Gold Standards for Evaluation?](http://www.lrec-conf.org/proceedings/lrec2008/pdf/144_paper.pdf) (Poibeau & Messiant, LREC 2008)
ACL
- Thierry Poibeau and Cédric Messiant. 2008. Do we Still Need Gold Standards for Evaluation?. In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), Marrakech, Morocco. European Language Resources Association (ELRA).