@inproceedings{tomanek-hahn-2010-annotation,
title = "Annotation Time Stamps {---} Temporal Metadata from the Linguistic Annotation Process",
author = "Tomanek, Katrin and
Hahn, Udo",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/652_Paper.pdf",
abstract = "We describe the re-annotation of selected types of named entities (persons, organizations, locations) from the Muc7 corpus. The focus of this annotation initiative is on recording the time needed for the linguistic process of named entity annotation. Annotation times are measured on two basic annotation units -- sentences vs. complex noun phrases. We gathered evidence that decision times are non-uniformly distributed over the annotation units, while they do not substantially deviate among annotators. This data seems to support the hypothesis that annotation times very much depend on the inherent ''``hardness'''' of each single annotation decision. We further show how such time-stamped information can be used for empirically grounded studies of selective sampling techniques, such as Active Learning. We directly compare Active Learning costs on the basis of token-based vs. time-based measurements. The data reveals that Active Learning keeps its competitive advantage over random sampling in both scenarios though the difference is less marked for the time metric than for the token metric.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tomanek-hahn-2010-annotation">
<titleInfo>
<title>Annotation Time Stamps — Temporal Metadata from the Linguistic Annotation Process</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katrin</namePart>
<namePart type="family">Tomanek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe the re-annotation of selected types of named entities (persons, organizations, locations) from the Muc7 corpus. The focus of this annotation initiative is on recording the time needed for the linguistic process of named entity annotation. Annotation times are measured on two basic annotation units – sentences vs. complex noun phrases. We gathered evidence that decision times are non-uniformly distributed over the annotation units, while they do not substantially deviate among annotators. This data seems to support the hypothesis that annotation times very much depend on the inherent ”“hardness”” of each single annotation decision. We further show how such time-stamped information can be used for empirically grounded studies of selective sampling techniques, such as Active Learning. We directly compare Active Learning costs on the basis of token-based vs. time-based measurements. The data reveals that Active Learning keeps its competitive advantage over random sampling in both scenarios though the difference is less marked for the time metric than for the token metric.</abstract>
<identifier type="citekey">tomanek-hahn-2010-annotation</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/652_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Annotation Time Stamps — Temporal Metadata from the Linguistic Annotation Process
%A Tomanek, Katrin
%A Hahn, Udo
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F tomanek-hahn-2010-annotation
%X We describe the re-annotation of selected types of named entities (persons, organizations, locations) from the Muc7 corpus. The focus of this annotation initiative is on recording the time needed for the linguistic process of named entity annotation. Annotation times are measured on two basic annotation units – sentences vs. complex noun phrases. We gathered evidence that decision times are non-uniformly distributed over the annotation units, while they do not substantially deviate among annotators. This data seems to support the hypothesis that annotation times very much depend on the inherent ”“hardness”” of each single annotation decision. We further show how such time-stamped information can be used for empirically grounded studies of selective sampling techniques, such as Active Learning. We directly compare Active Learning costs on the basis of token-based vs. time-based measurements. The data reveals that Active Learning keeps its competitive advantage over random sampling in both scenarios though the difference is less marked for the time metric than for the token metric.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/652_Paper.pdf
Markdown (Informal)
[Annotation Time Stamps — Temporal Metadata from the Linguistic Annotation Process](http://www.lrec-conf.org/proceedings/lrec2010/pdf/652_Paper.pdf) (Tomanek & Hahn, LREC 2010)
ACL