@inproceedings{peterson-etal-2014-focusing,
title = "Focusing Annotation for Semantic Role Labeling",
author = "Peterson, Daniel and
Palmer, Martha and
Wu, Shumin",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/977_Paper.pdf",
abstract = "Annotation of data is a time-consuming process, but necessary for many state-of-the-art solutions to NLP tasks, including semantic role labeling (SRL). In this paper, we show that language models may be used to select sentences that are more useful to annotate. We simulate a situation where only a portion of the available data can be annotated, and compare language model based selection against a more typical baseline of randomly selected data. The data is ordered using an off-the-shelf language modeling toolkit. We show that the least probable sentences provide dramatic improved system performance over the baseline, especially when only a small portion of the data is annotated. In fact, the lion{'}s share of the performance can be attained by annotating only 10-20{\%} of the data. This result holds for training a model based on new annotation, as well as when adding domain-specific annotation to a general corpus for domain adaptation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peterson-etal-2014-focusing">
<titleInfo>
<title>Focusing Annotation for Semantic Role Labeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Peterson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shumin</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Annotation of data is a time-consuming process, but necessary for many state-of-the-art solutions to NLP tasks, including semantic role labeling (SRL). In this paper, we show that language models may be used to select sentences that are more useful to annotate. We simulate a situation where only a portion of the available data can be annotated, and compare language model based selection against a more typical baseline of randomly selected data. The data is ordered using an off-the-shelf language modeling toolkit. We show that the least probable sentences provide dramatic improved system performance over the baseline, especially when only a small portion of the data is annotated. In fact, the lion’s share of the performance can be attained by annotating only 10-20% of the data. This result holds for training a model based on new annotation, as well as when adding domain-specific annotation to a general corpus for domain adaptation.</abstract>
<identifier type="citekey">peterson-etal-2014-focusing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/977_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Focusing Annotation for Semantic Role Labeling
%A Peterson, Daniel
%A Palmer, Martha
%A Wu, Shumin
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F peterson-etal-2014-focusing
%X Annotation of data is a time-consuming process, but necessary for many state-of-the-art solutions to NLP tasks, including semantic role labeling (SRL). In this paper, we show that language models may be used to select sentences that are more useful to annotate. We simulate a situation where only a portion of the available data can be annotated, and compare language model based selection against a more typical baseline of randomly selected data. The data is ordered using an off-the-shelf language modeling toolkit. We show that the least probable sentences provide dramatic improved system performance over the baseline, especially when only a small portion of the data is annotated. In fact, the lion’s share of the performance can be attained by annotating only 10-20% of the data. This result holds for training a model based on new annotation, as well as when adding domain-specific annotation to a general corpus for domain adaptation.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/977_Paper.pdf
Markdown (Informal)
[Focusing Annotation for Semantic Role Labeling](http://www.lrec-conf.org/proceedings/lrec2014/pdf/977_Paper.pdf) (Peterson et al., LREC 2014)
ACL
- Daniel Peterson, Martha Palmer, and Shumin Wu. 2014. Focusing Annotation for Semantic Role Labeling. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), Reykjavik, Iceland. European Language Resources Association (ELRA).