@inproceedings{kan-etal-2024-emancipating,
title = "Emancipating Event Extraction from the Constraints of Long-Tailed Distribution Data Utilizing Large Language Models",
author = "Kan, Zhigang and
Peng, Liwen and
Qiao, Linbo and
Li, Dongsheng",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.501",
pages = "5644--5653",
abstract = "Event Extraction (EE) is a challenging task that aims to extract structural event-related information from unstructured text. Traditional methods for EE depend on manual annotations, which are both expensive and scarce. Furthermore, the existing datasets mostly follow the long-tail distribution, severely hindering the previous methods of modeling tail types. Two techniques can address this issue: transfer learning and data generation. However, the existing methods based on transfer learning still rely on pre-training with a large amount of labeled data in the source domain. Additionally, the quality of data generated by previous data generation methods is difficult to control. In this paper, leveraging Large Language Models (LLMs), we propose novel methods for event extraction and generation based on dialogues, overcoming the problems of relying on source domain data and maintaining data quality. Specifically, this paper innovatively transforms the EE task into multi-turn dialogues, guiding LLMs to learn event schemas from historical dialogue information and output structural events. Furthermore, we introduce a novel LLM-based method for generating high-quality data, significantly improving traditional models{'} performance with various paradigms and structures, especially on tail types. Adequate experiments on real-world datasets demonstrate the effectiveness of the proposed event extraction and data generation methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kan-etal-2024-emancipating">
<titleInfo>
<title>Emancipating Event Extraction from the Constraints of Long-Tailed Distribution Data Utilizing Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhigang</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liwen</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linbo</namePart>
<namePart type="family">Qiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongsheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Event Extraction (EE) is a challenging task that aims to extract structural event-related information from unstructured text. Traditional methods for EE depend on manual annotations, which are both expensive and scarce. Furthermore, the existing datasets mostly follow the long-tail distribution, severely hindering the previous methods of modeling tail types. Two techniques can address this issue: transfer learning and data generation. However, the existing methods based on transfer learning still rely on pre-training with a large amount of labeled data in the source domain. Additionally, the quality of data generated by previous data generation methods is difficult to control. In this paper, leveraging Large Language Models (LLMs), we propose novel methods for event extraction and generation based on dialogues, overcoming the problems of relying on source domain data and maintaining data quality. Specifically, this paper innovatively transforms the EE task into multi-turn dialogues, guiding LLMs to learn event schemas from historical dialogue information and output structural events. Furthermore, we introduce a novel LLM-based method for generating high-quality data, significantly improving traditional models’ performance with various paradigms and structures, especially on tail types. Adequate experiments on real-world datasets demonstrate the effectiveness of the proposed event extraction and data generation methods.</abstract>
<identifier type="citekey">kan-etal-2024-emancipating</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.501</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>5644</start>
<end>5653</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Emancipating Event Extraction from the Constraints of Long-Tailed Distribution Data Utilizing Large Language Models
%A Kan, Zhigang
%A Peng, Liwen
%A Qiao, Linbo
%A Li, Dongsheng
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F kan-etal-2024-emancipating
%X Event Extraction (EE) is a challenging task that aims to extract structural event-related information from unstructured text. Traditional methods for EE depend on manual annotations, which are both expensive and scarce. Furthermore, the existing datasets mostly follow the long-tail distribution, severely hindering the previous methods of modeling tail types. Two techniques can address this issue: transfer learning and data generation. However, the existing methods based on transfer learning still rely on pre-training with a large amount of labeled data in the source domain. Additionally, the quality of data generated by previous data generation methods is difficult to control. In this paper, leveraging Large Language Models (LLMs), we propose novel methods for event extraction and generation based on dialogues, overcoming the problems of relying on source domain data and maintaining data quality. Specifically, this paper innovatively transforms the EE task into multi-turn dialogues, guiding LLMs to learn event schemas from historical dialogue information and output structural events. Furthermore, we introduce a novel LLM-based method for generating high-quality data, significantly improving traditional models’ performance with various paradigms and structures, especially on tail types. Adequate experiments on real-world datasets demonstrate the effectiveness of the proposed event extraction and data generation methods.
%U https://aclanthology.org/2024.lrec-main.501
%P 5644-5653
Markdown (Informal)
[Emancipating Event Extraction from the Constraints of Long-Tailed Distribution Data Utilizing Large Language Models](https://aclanthology.org/2024.lrec-main.501) (Kan et al., LREC-COLING 2024)
ACL