@inproceedings{cai-oconnor-2023-evaluating,
title = "Evaluating Zero-Shot Event Structures: Recommendations for Automatic Content Extraction ({ACE}) Annotations",
author = "Cai, Erica and
O{'}Connor, Brendan",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-short.142",
doi = "10.18653/v1/2023.acl-short.142",
pages = "1651--1665",
abstract = "Zero-shot event extraction (EE) methods infer richly structured event records from text, based only on a minimal user specification and no training examples, which enables flexibility in exploring and developing applications. Most event extraction research uses the Automatic Content Extraction (ACE) annotated dataset to evaluate supervised EE methods, but can it be used to evaluate zero-shot and other low-supervision EE? We describe ACE{'}s event structures and identify significant ambiguities and issues in current evaluation practice, including (1) coreferent argument mentions, (2) conflicting argument head conventions, and (3) ignorance of modality and event class details. By sometimes mishandling these subtleties, current work may dramatically understate the actual performance of zero-shot and other low-supervision EE, considering up to 32{\%} of correctly identified arguments and 25{\%} of correctly ignored event mentions as false negatives. For each issue, we propose recommendations for future evaluations so the research community can better utilize ACE as an event evaluation resource.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cai-oconnor-2023-evaluating">
<titleInfo>
<title>Evaluating Zero-Shot Event Structures: Recommendations for Automatic Content Extraction (ACE) Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Erica</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Zero-shot event extraction (EE) methods infer richly structured event records from text, based only on a minimal user specification and no training examples, which enables flexibility in exploring and developing applications. Most event extraction research uses the Automatic Content Extraction (ACE) annotated dataset to evaluate supervised EE methods, but can it be used to evaluate zero-shot and other low-supervision EE? We describe ACE’s event structures and identify significant ambiguities and issues in current evaluation practice, including (1) coreferent argument mentions, (2) conflicting argument head conventions, and (3) ignorance of modality and event class details. By sometimes mishandling these subtleties, current work may dramatically understate the actual performance of zero-shot and other low-supervision EE, considering up to 32% of correctly identified arguments and 25% of correctly ignored event mentions as false negatives. For each issue, we propose recommendations for future evaluations so the research community can better utilize ACE as an event evaluation resource.</abstract>
<identifier type="citekey">cai-oconnor-2023-evaluating</identifier>
<identifier type="doi">10.18653/v1/2023.acl-short.142</identifier>
<location>
<url>https://aclanthology.org/2023.acl-short.142</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1651</start>
<end>1665</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Zero-Shot Event Structures: Recommendations for Automatic Content Extraction (ACE) Annotations
%A Cai, Erica
%A O’Connor, Brendan
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F cai-oconnor-2023-evaluating
%X Zero-shot event extraction (EE) methods infer richly structured event records from text, based only on a minimal user specification and no training examples, which enables flexibility in exploring and developing applications. Most event extraction research uses the Automatic Content Extraction (ACE) annotated dataset to evaluate supervised EE methods, but can it be used to evaluate zero-shot and other low-supervision EE? We describe ACE’s event structures and identify significant ambiguities and issues in current evaluation practice, including (1) coreferent argument mentions, (2) conflicting argument head conventions, and (3) ignorance of modality and event class details. By sometimes mishandling these subtleties, current work may dramatically understate the actual performance of zero-shot and other low-supervision EE, considering up to 32% of correctly identified arguments and 25% of correctly ignored event mentions as false negatives. For each issue, we propose recommendations for future evaluations so the research community can better utilize ACE as an event evaluation resource.
%R 10.18653/v1/2023.acl-short.142
%U https://aclanthology.org/2023.acl-short.142
%U https://doi.org/10.18653/v1/2023.acl-short.142
%P 1651-1665
Markdown (Informal)
[Evaluating Zero-Shot Event Structures: Recommendations for Automatic Content Extraction (ACE) Annotations](https://aclanthology.org/2023.acl-short.142) (Cai & O’Connor, ACL 2023)
ACL