@inproceedings{madge-etal-2019-crowdsourcing,
title = "Crowdsourcing and Aggregating Nested Markable Annotations",
author = "Madge, Chris and
Yu, Juntao and
Chamberlain, Jon and
Kruschwitz, Udo and
Paun, Silviu and
Poesio, Massimo",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1077",
doi = "10.18653/v1/P19-1077",
pages = "797--807",
abstract = "One of the key steps in language resource creation is the identification of the text segments to be annotated, or markables, which depending on the task may vary from nominal chunks for named entity resolution to (potentially nested) noun phrases in coreference resolution (or mentions) to larger text segments in text segmentation. Markable identification is typically carried out semi-automatically, by running a markable identifier and correcting its output by hand{--}which is increasingly done via annotators recruited through crowdsourcing and aggregating their responses. In this paper, we present a method for identifying markables for coreference annotation that combines high-performance automatic markable detectors with checking with a Game-With-A-Purpose (GWAP) and aggregation using a Bayesian annotation model. The method was evaluated both on news data and data from a variety of other genres and results in an improvement on F1 of mention boundaries of over seven percentage points when compared with a state-of-the-art, domain-independent automatic mention detector, and almost three points over an in-domain mention detector. One of the key contributions of our proposal is its applicability to the case in which markables are nested, as is the case with coreference markables; but the GWAP and several of the proposed markable detectors are task and language-independent and are thus applicable to a variety of other annotation scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="madge-etal-2019-crowdsourcing">
<titleInfo>
<title>Crowdsourcing and Aggregating Nested Markable Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Madge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jon</namePart>
<namePart type="family">Chamberlain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Kruschwitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silviu</namePart>
<namePart type="family">Paun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Poesio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the key steps in language resource creation is the identification of the text segments to be annotated, or markables, which depending on the task may vary from nominal chunks for named entity resolution to (potentially nested) noun phrases in coreference resolution (or mentions) to larger text segments in text segmentation. Markable identification is typically carried out semi-automatically, by running a markable identifier and correcting its output by hand–which is increasingly done via annotators recruited through crowdsourcing and aggregating their responses. In this paper, we present a method for identifying markables for coreference annotation that combines high-performance automatic markable detectors with checking with a Game-With-A-Purpose (GWAP) and aggregation using a Bayesian annotation model. The method was evaluated both on news data and data from a variety of other genres and results in an improvement on F1 of mention boundaries of over seven percentage points when compared with a state-of-the-art, domain-independent automatic mention detector, and almost three points over an in-domain mention detector. One of the key contributions of our proposal is its applicability to the case in which markables are nested, as is the case with coreference markables; but the GWAP and several of the proposed markable detectors are task and language-independent and are thus applicable to a variety of other annotation scenarios.</abstract>
<identifier type="citekey">madge-etal-2019-crowdsourcing</identifier>
<identifier type="doi">10.18653/v1/P19-1077</identifier>
<location>
<url>https://aclanthology.org/P19-1077</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>797</start>
<end>807</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Crowdsourcing and Aggregating Nested Markable Annotations
%A Madge, Chris
%A Yu, Juntao
%A Chamberlain, Jon
%A Kruschwitz, Udo
%A Paun, Silviu
%A Poesio, Massimo
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F madge-etal-2019-crowdsourcing
%X One of the key steps in language resource creation is the identification of the text segments to be annotated, or markables, which depending on the task may vary from nominal chunks for named entity resolution to (potentially nested) noun phrases in coreference resolution (or mentions) to larger text segments in text segmentation. Markable identification is typically carried out semi-automatically, by running a markable identifier and correcting its output by hand–which is increasingly done via annotators recruited through crowdsourcing and aggregating their responses. In this paper, we present a method for identifying markables for coreference annotation that combines high-performance automatic markable detectors with checking with a Game-With-A-Purpose (GWAP) and aggregation using a Bayesian annotation model. The method was evaluated both on news data and data from a variety of other genres and results in an improvement on F1 of mention boundaries of over seven percentage points when compared with a state-of-the-art, domain-independent automatic mention detector, and almost three points over an in-domain mention detector. One of the key contributions of our proposal is its applicability to the case in which markables are nested, as is the case with coreference markables; but the GWAP and several of the proposed markable detectors are task and language-independent and are thus applicable to a variety of other annotation scenarios.
%R 10.18653/v1/P19-1077
%U https://aclanthology.org/P19-1077
%U https://doi.org/10.18653/v1/P19-1077
%P 797-807
Markdown (Informal)
[Crowdsourcing and Aggregating Nested Markable Annotations](https://aclanthology.org/P19-1077) (Madge et al., ACL 2019)
ACL
- Chris Madge, Juntao Yu, Jon Chamberlain, Udo Kruschwitz, Silviu Paun, and Massimo Poesio. 2019. Crowdsourcing and Aggregating Nested Markable Annotations. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 797–807, Florence, Italy. Association for Computational Linguistics.