@article{TACL476,
        author = {Congle Zhang and Stephen Soderland and Daniel Weld},
        title = {Exploiting Parallel News Streams for Unsupervised Event
Extraction},
        journal = {Transactions of the Association for Computational Linguistics},
        volume = {3},
        year = {2015},
        keywords = {},
        abstract = {Most approaches to relation extraction, the task of extracting
ground facts from natural language text, are based on machine learning and
thus starved by scarce training data. Manual annotation is too expensive to
scale to a comprehensive set of relations. Distant supervision, which
automatically creates training data, only works with relations that already
populate a knowledge base (KB). Unfortunately, KBs such as FreeBase rarely
cover event relations (e.g. “person travels to location”). Thus, the
problem of extracting a wide range of events — e.g., from news streams —
is an important, open challenge. This paper introduces NewsSpike-RE, a
novel, unsupervised algorithm that discovers event relations and then learns
to extract them. NewsSpike-RE uses a novel probabilistic graphical model to
cluster sentences describing similar events from parallel news streams.
These clusters then comprise training data for the extractor. Our evaluation
shows that NewsSpike-RE generates high quality training sentences and learns
extractors that perform much better than rival approaches, more than
doubling the area under a precision-recall curve compared to Universal
Schemas.},
        issn = {2307-387X},
        url =
{https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/476},
        pages = {117--129}
}
