@inproceedings{nguyen-etal-2016-dataset,
title = "A Dataset for Open Event Extraction in {E}nglish",
author = "Nguyen, Kiem-Hieu and
Tannier, Xavier and
Ferret, Olivier and
Besan{\c{c}}on, Romaric",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1307",
pages = "1939--1943",
abstract = "This article presents a corpus for development and testing of event schema induction systems in English. Schema induction is the task of learning templates with no supervision from unlabeled texts, and to group together entities corresponding to the same role in a template. Most of the previous work on this subject relies on the MUC-4 corpus. We describe the limits of using this corpus (size, non-representativeness, similarity of roles across templates) and propose a new, partially-annotated corpus in English which remedies some of these shortcomings. We make use of Wikinews to select the data inside the category Laws {\&} Justice, and query Google search engine to retrieve different documents on the same events. Only Wikinews documents are manually annotated and can be used for evaluation, while the others can be used for unsupervised learning. We detail the methodology used for building the corpus and evaluate some existing systems on this new data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2016-dataset">
<titleInfo>
<title>A Dataset for Open Event Extraction in English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kiem-Hieu</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Tannier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivier</namePart>
<namePart type="family">Ferret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Romaric</namePart>
<namePart type="family">Besançon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article presents a corpus for development and testing of event schema induction systems in English. Schema induction is the task of learning templates with no supervision from unlabeled texts, and to group together entities corresponding to the same role in a template. Most of the previous work on this subject relies on the MUC-4 corpus. We describe the limits of using this corpus (size, non-representativeness, similarity of roles across templates) and propose a new, partially-annotated corpus in English which remedies some of these shortcomings. We make use of Wikinews to select the data inside the category Laws & Justice, and query Google search engine to retrieve different documents on the same events. Only Wikinews documents are manually annotated and can be used for evaluation, while the others can be used for unsupervised learning. We detail the methodology used for building the corpus and evaluate some existing systems on this new data.</abstract>
<identifier type="citekey">nguyen-etal-2016-dataset</identifier>
<location>
<url>https://aclanthology.org/L16-1307</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>1939</start>
<end>1943</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset for Open Event Extraction in English
%A Nguyen, Kiem-Hieu
%A Tannier, Xavier
%A Ferret, Olivier
%A Besançon, Romaric
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F nguyen-etal-2016-dataset
%X This article presents a corpus for development and testing of event schema induction systems in English. Schema induction is the task of learning templates with no supervision from unlabeled texts, and to group together entities corresponding to the same role in a template. Most of the previous work on this subject relies on the MUC-4 corpus. We describe the limits of using this corpus (size, non-representativeness, similarity of roles across templates) and propose a new, partially-annotated corpus in English which remedies some of these shortcomings. We make use of Wikinews to select the data inside the category Laws & Justice, and query Google search engine to retrieve different documents on the same events. Only Wikinews documents are manually annotated and can be used for evaluation, while the others can be used for unsupervised learning. We detail the methodology used for building the corpus and evaluate some existing systems on this new data.
%U https://aclanthology.org/L16-1307
%P 1939-1943
Markdown (Informal)
[A Dataset for Open Event Extraction in English](https://aclanthology.org/L16-1307) (Nguyen et al., LREC 2016)
ACL
- Kiem-Hieu Nguyen, Xavier Tannier, Olivier Ferret, and Romaric Besançon. 2016. A Dataset for Open Event Extraction in English. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 1939–1943, Portorož, Slovenia. European Language Resources Association (ELRA).