@inproceedings{hieu-etal-2026-constructing,
title = "Constructing a Silver Corpus for Weakly Supervised {V}ietnamese Event Extraction using Cross-Document N-ary Relation Filtering",
author = "Hiệu, Phạm Xu{\^a}n and
Minh, Tuan Vu and
Tran, Mai-Vu and
Le, Hoang-Quynh",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Thapa, Surendrabikram and
Tanev, Hristo},
booktitle = "Proceedings of the 9th Workshop on Event Extraction and Understanding: Challenges and Applications ({EEUCA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eeuca-1.4/",
pages = "26--37",
ISBN = "979-8-89176-402-6",
abstract = "Event extraction for low-resource languages such as Vietnamese is limited by the lack of large-scale annotated data. To address this, we propose a weakly supervised framework that constructs a silver corpus via pseudo-labeling. We introduce a cross-document n-ary relation filtering strategy to reduce noise by leveraging consistency across multiple articles describing the same event, and further enhance data diversity with schema-based augmentation. Experiments on the BKEE benchmark show consistent improvements, demonstrating the effectiveness of our approach. Data is available at: \url{https://github.com/Larken1612/VietEE2}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hieu-etal-2026-constructing">
<titleInfo>
<title>Constructing a Silver Corpus for Weakly Supervised Vietnamese Event Extraction using Cross-Document N-ary Relation Filtering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Phạm</namePart>
<namePart type="given">Xuân</namePart>
<namePart type="family">Hiệu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tuan</namePart>
<namePart type="given">Vu</namePart>
<namePart type="family">Minh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mai-Vu</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hoang-Quynh</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Event Extraction and Understanding: Challenges and Applications (EEUCA 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-402-6</identifier>
</relatedItem>
<abstract>Event extraction for low-resource languages such as Vietnamese is limited by the lack of large-scale annotated data. To address this, we propose a weakly supervised framework that constructs a silver corpus via pseudo-labeling. We introduce a cross-document n-ary relation filtering strategy to reduce noise by leveraging consistency across multiple articles describing the same event, and further enhance data diversity with schema-based augmentation. Experiments on the BKEE benchmark show consistent improvements, demonstrating the effectiveness of our approach. Data is available at: https://github.com/Larken1612/VietEE2.</abstract>
<identifier type="citekey">hieu-etal-2026-constructing</identifier>
<location>
<url>https://aclanthology.org/2026.eeuca-1.4/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>26</start>
<end>37</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Constructing a Silver Corpus for Weakly Supervised Vietnamese Event Extraction using Cross-Document N-ary Relation Filtering
%A Hiệu, Phạm Xuân
%A Minh, Tuan Vu
%A Tran, Mai-Vu
%A Le, Hoang-Quynh
%Y Hürriyetoğlu, Ali
%Y Thapa, Surendrabikram
%Y Tanev, Hristo
%S Proceedings of the 9th Workshop on Event Extraction and Understanding: Challenges and Applications (EEUCA 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-402-6
%F hieu-etal-2026-constructing
%X Event extraction for low-resource languages such as Vietnamese is limited by the lack of large-scale annotated data. To address this, we propose a weakly supervised framework that constructs a silver corpus via pseudo-labeling. We introduce a cross-document n-ary relation filtering strategy to reduce noise by leveraging consistency across multiple articles describing the same event, and further enhance data diversity with schema-based augmentation. Experiments on the BKEE benchmark show consistent improvements, demonstrating the effectiveness of our approach. Data is available at: https://github.com/Larken1612/VietEE2.
%U https://aclanthology.org/2026.eeuca-1.4/
%P 26-37
Markdown (Informal)
[Constructing a Silver Corpus for Weakly Supervised Vietnamese Event Extraction using Cross-Document N-ary Relation Filtering](https://aclanthology.org/2026.eeuca-1.4/) (Hiệu et al., EEUCA 2026)
ACL