@inproceedings{colruyt-etal-2019-leveraging,
title = "Leveraging syntactic parsing to improve event annotation matching",
author = "Colruyt, Camiel and
De Clercq, Orph{\'e}e and
Hoste, V{\'e}ronique",
editor = "Paun, Silviu and
Hovy, Dirk",
booktitle = "Proceedings of the First Workshop on Aggregating and Analysing Crowdsourced Annotations for NLP",
month = nov,
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5903/",
doi = "10.18653/v1/D19-5903",
pages = "15--23",
abstract = "Detecting event mentions is the first step in event extraction from text and annotating them is a notoriously difficult task. Evaluating annotator consistency is crucial when building datasets for mention detection. When event mentions are allowed to cover many tokens, annotators may disagree on their span, which means that overlapping annotations may then refer to the same event or to different events. This paper explores different fuzzy-matching functions which aim to resolve this ambiguity. The functions extract the sets of syntactic heads present in the annotations, use the Dice coefficient to measure the similarity between sets and return a judgment based on a given threshold. The functions are tested against the judgment of a human evaluator and a comparison is made between sets of tokens and sets of syntactic heads. The best-performing function is a head-based function that is found to agree with the human evaluator in 89{\%} of cases."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="colruyt-etal-2019-leveraging">
<titleInfo>
<title>Leveraging syntactic parsing to improve event annotation matching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Camiel</namePart>
<namePart type="family">Colruyt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orphée</namePart>
<namePart type="family">De Clercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Véronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Aggregating and Analysing Crowdsourced Annotations for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Silviu</namePart>
<namePart type="family">Paun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Detecting event mentions is the first step in event extraction from text and annotating them is a notoriously difficult task. Evaluating annotator consistency is crucial when building datasets for mention detection. When event mentions are allowed to cover many tokens, annotators may disagree on their span, which means that overlapping annotations may then refer to the same event or to different events. This paper explores different fuzzy-matching functions which aim to resolve this ambiguity. The functions extract the sets of syntactic heads present in the annotations, use the Dice coefficient to measure the similarity between sets and return a judgment based on a given threshold. The functions are tested against the judgment of a human evaluator and a comparison is made between sets of tokens and sets of syntactic heads. The best-performing function is a head-based function that is found to agree with the human evaluator in 89% of cases.</abstract>
<identifier type="citekey">colruyt-etal-2019-leveraging</identifier>
<identifier type="doi">10.18653/v1/D19-5903</identifier>
<location>
<url>https://aclanthology.org/D19-5903/</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>15</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging syntactic parsing to improve event annotation matching
%A Colruyt, Camiel
%A De Clercq, Orphée
%A Hoste, Véronique
%Y Paun, Silviu
%Y Hovy, Dirk
%S Proceedings of the First Workshop on Aggregating and Analysing Crowdsourced Annotations for NLP
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong
%F colruyt-etal-2019-leveraging
%X Detecting event mentions is the first step in event extraction from text and annotating them is a notoriously difficult task. Evaluating annotator consistency is crucial when building datasets for mention detection. When event mentions are allowed to cover many tokens, annotators may disagree on their span, which means that overlapping annotations may then refer to the same event or to different events. This paper explores different fuzzy-matching functions which aim to resolve this ambiguity. The functions extract the sets of syntactic heads present in the annotations, use the Dice coefficient to measure the similarity between sets and return a judgment based on a given threshold. The functions are tested against the judgment of a human evaluator and a comparison is made between sets of tokens and sets of syntactic heads. The best-performing function is a head-based function that is found to agree with the human evaluator in 89% of cases.
%R 10.18653/v1/D19-5903
%U https://aclanthology.org/D19-5903/
%U https://doi.org/10.18653/v1/D19-5903
%P 15-23
Markdown (Informal)
[Leveraging syntactic parsing to improve event annotation matching](https://aclanthology.org/D19-5903/) (Colruyt et al., 2019)
ACL