@inproceedings{liu-etal-2025-towards-event,
title = "Towards Event Extraction with Massive Types: {LLM}-based Collaborative Annotation and Partitioning Extraction",
author = "Liu, Wenxuan and
Li, Zixuan and
Bai, Long and
Zuo, Yuxin and
Xu, Daozhu and
Jin, Xiaolong and
Guo, Jiafeng and
Cheng, Xueqi",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1743/",
doi = "10.18653/v1/2025.emnlp-main.1743",
pages = "34377--34399",
ISBN = "979-8-89176-332-6",
abstract = "Developing a general-purpose system that can extract events with massive types is a long-standing target in Event Extraction (EE). In doing so, the basic challenge comes from the absence of an efficient and effective annotation framework to construct the corresponding datasets. In this paper, we propose an LLM-based collaborative annotation framework. Through collaboration among multiple LLMs and a subsequent voting process, it refines annotations of triggers from distant supervision and then carries out argument annotation. Finally, we create EEMT, the largest EE dataset to date, featuring over **200,000** samples, **3,465** event types, and **6,297** role types. Evaluation on human-annotated test set demonstrates that the proposed framework achieves the F1 scores of **90.1{\%}** and **85.3{\%}** for event detection and argument extraction, strongly validating its effectiveness. Besides, to alleviate the excessively long prompts caused by massive types, we propose an LLM-based Partitioning method for EE called LLM-PEE. It first recalls candidate event types and then splits them into multiple partitions for LLMs to extract. After fine-tuning on the EEMT training set, the distilled LLM-PEE with 7B parameters outperforms state-of-the-art methods by **5.4{\%}** and **6.1{\%}** in event detection and argument extraction. Besides, it also surpasses mainstream LLMs by **12.9{\%}** on the unseen datasets, which strongly demonstrates the event diversity of the EEMT dataset and the generalization capabilities of the LLM-PEE method."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2025-towards-event">
<titleInfo>
<title>Towards Event Extraction with Massive Types: LLM-based Collaborative Annotation and Partitioning Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenxuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zixuan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Long</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxin</namePart>
<namePart type="family">Zuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daozhu</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaolong</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiafeng</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xueqi</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Developing a general-purpose system that can extract events with massive types is a long-standing target in Event Extraction (EE). In doing so, the basic challenge comes from the absence of an efficient and effective annotation framework to construct the corresponding datasets. In this paper, we propose an LLM-based collaborative annotation framework. Through collaboration among multiple LLMs and a subsequent voting process, it refines annotations of triggers from distant supervision and then carries out argument annotation. Finally, we create EEMT, the largest EE dataset to date, featuring over **200,000** samples, **3,465** event types, and **6,297** role types. Evaluation on human-annotated test set demonstrates that the proposed framework achieves the F1 scores of **90.1%** and **85.3%** for event detection and argument extraction, strongly validating its effectiveness. Besides, to alleviate the excessively long prompts caused by massive types, we propose an LLM-based Partitioning method for EE called LLM-PEE. It first recalls candidate event types and then splits them into multiple partitions for LLMs to extract. After fine-tuning on the EEMT training set, the distilled LLM-PEE with 7B parameters outperforms state-of-the-art methods by **5.4%** and **6.1%** in event detection and argument extraction. Besides, it also surpasses mainstream LLMs by **12.9%** on the unseen datasets, which strongly demonstrates the event diversity of the EEMT dataset and the generalization capabilities of the LLM-PEE method.</abstract>
<identifier type="citekey">liu-etal-2025-towards-event</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.1743</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1743/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>34377</start>
<end>34399</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Event Extraction with Massive Types: LLM-based Collaborative Annotation and Partitioning Extraction
%A Liu, Wenxuan
%A Li, Zixuan
%A Bai, Long
%A Zuo, Yuxin
%A Xu, Daozhu
%A Jin, Xiaolong
%A Guo, Jiafeng
%A Cheng, Xueqi
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F liu-etal-2025-towards-event
%X Developing a general-purpose system that can extract events with massive types is a long-standing target in Event Extraction (EE). In doing so, the basic challenge comes from the absence of an efficient and effective annotation framework to construct the corresponding datasets. In this paper, we propose an LLM-based collaborative annotation framework. Through collaboration among multiple LLMs and a subsequent voting process, it refines annotations of triggers from distant supervision and then carries out argument annotation. Finally, we create EEMT, the largest EE dataset to date, featuring over **200,000** samples, **3,465** event types, and **6,297** role types. Evaluation on human-annotated test set demonstrates that the proposed framework achieves the F1 scores of **90.1%** and **85.3%** for event detection and argument extraction, strongly validating its effectiveness. Besides, to alleviate the excessively long prompts caused by massive types, we propose an LLM-based Partitioning method for EE called LLM-PEE. It first recalls candidate event types and then splits them into multiple partitions for LLMs to extract. After fine-tuning on the EEMT training set, the distilled LLM-PEE with 7B parameters outperforms state-of-the-art methods by **5.4%** and **6.1%** in event detection and argument extraction. Besides, it also surpasses mainstream LLMs by **12.9%** on the unseen datasets, which strongly demonstrates the event diversity of the EEMT dataset and the generalization capabilities of the LLM-PEE method.
%R 10.18653/v1/2025.emnlp-main.1743
%U https://aclanthology.org/2025.emnlp-main.1743/
%U https://doi.org/10.18653/v1/2025.emnlp-main.1743
%P 34377-34399
Markdown (Informal)
[Towards Event Extraction with Massive Types: LLM-based Collaborative Annotation and Partitioning Extraction](https://aclanthology.org/2025.emnlp-main.1743/) (Liu et al., EMNLP 2025)
ACL
- Wenxuan Liu, Zixuan Li, Long Bai, Yuxin Zuo, Daozhu Xu, Xiaolong Jin, Jiafeng Guo, and Xueqi Cheng. 2025. Towards Event Extraction with Massive Types: LLM-based Collaborative Annotation and Partitioning Extraction. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 34377–34399, Suzhou, China. Association for Computational Linguistics.