@inproceedings{arehart-etal-2008-adjudicator,
title = "Adjudicator Agreement and System Rankings for Person Name Search",
author = "Arehart, Mark and
Wolf, Chris and
Miller, Keith J.",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/647_paper.pdf",
abstract = "We have analyzed system rankings for person name search algorithms using a data set for which several versions of ground truth were developed by employing different means of resolving adjudicator conflicts. Thirteen algorithms were ranked by F-score, using bootstrap resampling for significance testing, on a dataset containing 70,000 romanized names from various cultures. We found some disagreement among the four adjudicators, with kappa ranging from 0.57 to 0.78. Truth sets based on a single adjudicator, and on the intersection or union of positive adjudications produced sizeable variability in scoring sensitivity - and to a lesser degree rank order - compared to the consensus truth set. However, results on truth sets constructed by randomly choosing an adjudicator for each item were highly consistent with the consensus. The implication is that an evaluation where one adjudicator has judged each item is nearly as good as a more expensive and labor-intensive one where multiple adjudicators have judged each item and conflicts are resolved through voting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arehart-etal-2008-adjudicator">
<titleInfo>
<title>Adjudicator Agreement and System Rankings for Person Name Search</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Arehart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keith</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Miller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We have analyzed system rankings for person name search algorithms using a data set for which several versions of ground truth were developed by employing different means of resolving adjudicator conflicts. Thirteen algorithms were ranked by F-score, using bootstrap resampling for significance testing, on a dataset containing 70,000 romanized names from various cultures. We found some disagreement among the four adjudicators, with kappa ranging from 0.57 to 0.78. Truth sets based on a single adjudicator, and on the intersection or union of positive adjudications produced sizeable variability in scoring sensitivity - and to a lesser degree rank order - compared to the consensus truth set. However, results on truth sets constructed by randomly choosing an adjudicator for each item were highly consistent with the consensus. The implication is that an evaluation where one adjudicator has judged each item is nearly as good as a more expensive and labor-intensive one where multiple adjudicators have judged each item and conflicts are resolved through voting.</abstract>
<identifier type="citekey">arehart-etal-2008-adjudicator</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/647_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adjudicator Agreement and System Rankings for Person Name Search
%A Arehart, Mark
%A Wolf, Chris
%A Miller, Keith J.
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F arehart-etal-2008-adjudicator
%X We have analyzed system rankings for person name search algorithms using a data set for which several versions of ground truth were developed by employing different means of resolving adjudicator conflicts. Thirteen algorithms were ranked by F-score, using bootstrap resampling for significance testing, on a dataset containing 70,000 romanized names from various cultures. We found some disagreement among the four adjudicators, with kappa ranging from 0.57 to 0.78. Truth sets based on a single adjudicator, and on the intersection or union of positive adjudications produced sizeable variability in scoring sensitivity - and to a lesser degree rank order - compared to the consensus truth set. However, results on truth sets constructed by randomly choosing an adjudicator for each item were highly consistent with the consensus. The implication is that an evaluation where one adjudicator has judged each item is nearly as good as a more expensive and labor-intensive one where multiple adjudicators have judged each item and conflicts are resolved through voting.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/647_paper.pdf
Markdown (Informal)
[Adjudicator Agreement and System Rankings for Person Name Search](http://www.lrec-conf.org/proceedings/lrec2008/pdf/647_paper.pdf) (Arehart et al., LREC 2008)
ACL