@inproceedings{lu-etal-2016-evaluating,
title = "Evaluating Ensemble Based Pre-annotation on Named Entity Corpus Construction in {E}nglish and {C}hinese",
author = "Lu, Tingming and
Zhu, Man and
Gao, Zhiqiang and
Gui, Yaocheng",
editor = "Murakami, Yohei and
Lin, Donghui and
Ide, Nancy and
Pustejovsky, James",
booktitle = "Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies ({WLSI}/{OIAF}4{HLT}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-5208",
pages = "56--60",
abstract = "Annotated corpora are crucial language resources, and pre-annotation is an usual way to reduce the cost of corpus construction. Ensemble based pre-annotation approach combines multiple existing named entity taggers and categorizes annotations into normal annotations with high confidence and candidate annotations with low confidence, to reduce the human annotation time. In this paper, we manually annotate three English datasets under various pre-annotation conditions, report the effects of ensemble based pre-annotation, and analyze the experimental results. In order to verify the effectiveness of ensemble based pre-annotation in other languages, such as Chinese, three Chinese datasets are also tested. The experimental results show that the ensemble based pre-annotation approach significantly reduces the number of annotations which human annotators have to add, and outperforms the baseline approaches in reduction of human annotation time without loss in annotation performance (in terms of F1-measure), on both English and Chinese datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2016-evaluating">
<titleInfo>
<title>Evaluating Ensemble Based Pre-annotation on Named Entity Corpus Construction in English and Chinese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tingming</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Man</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaocheng</namePart>
<namePart type="family">Gui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI/OIAF4HLT2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Murakami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Donghui</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nancy</namePart>
<namePart type="family">Ide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Annotated corpora are crucial language resources, and pre-annotation is an usual way to reduce the cost of corpus construction. Ensemble based pre-annotation approach combines multiple existing named entity taggers and categorizes annotations into normal annotations with high confidence and candidate annotations with low confidence, to reduce the human annotation time. In this paper, we manually annotate three English datasets under various pre-annotation conditions, report the effects of ensemble based pre-annotation, and analyze the experimental results. In order to verify the effectiveness of ensemble based pre-annotation in other languages, such as Chinese, three Chinese datasets are also tested. The experimental results show that the ensemble based pre-annotation approach significantly reduces the number of annotations which human annotators have to add, and outperforms the baseline approaches in reduction of human annotation time without loss in annotation performance (in terms of F1-measure), on both English and Chinese datasets.</abstract>
<identifier type="citekey">lu-etal-2016-evaluating</identifier>
<location>
<url>https://aclanthology.org/W16-5208</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>56</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Ensemble Based Pre-annotation on Named Entity Corpus Construction in English and Chinese
%A Lu, Tingming
%A Zhu, Man
%A Gao, Zhiqiang
%A Gui, Yaocheng
%Y Murakami, Yohei
%Y Lin, Donghui
%Y Ide, Nancy
%Y Pustejovsky, James
%S Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI/OIAF4HLT2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F lu-etal-2016-evaluating
%X Annotated corpora are crucial language resources, and pre-annotation is an usual way to reduce the cost of corpus construction. Ensemble based pre-annotation approach combines multiple existing named entity taggers and categorizes annotations into normal annotations with high confidence and candidate annotations with low confidence, to reduce the human annotation time. In this paper, we manually annotate three English datasets under various pre-annotation conditions, report the effects of ensemble based pre-annotation, and analyze the experimental results. In order to verify the effectiveness of ensemble based pre-annotation in other languages, such as Chinese, three Chinese datasets are also tested. The experimental results show that the ensemble based pre-annotation approach significantly reduces the number of annotations which human annotators have to add, and outperforms the baseline approaches in reduction of human annotation time without loss in annotation performance (in terms of F1-measure), on both English and Chinese datasets.
%U https://aclanthology.org/W16-5208
%P 56-60
Markdown (Informal)
[Evaluating Ensemble Based Pre-annotation on Named Entity Corpus Construction in English and Chinese](https://aclanthology.org/W16-5208) (Lu et al., OIAF4HLT 2016)
ACL