@inproceedings{clancy-etal-2019-scalable,
title = "Scalable Knowledge Graph Construction from Text Collections",
author = "Clancy, Ryan and
Ilyas, Ihab F. and
Lin, Jimmy",
editor = "Thorne, James and
Vlachos, Andreas and
Cocarascu, Oana and
Christodoulopoulos, Christos and
Mittal, Arpit",
booktitle = "Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-6607",
doi = "10.18653/v1/D19-6607",
pages = "39--46",
abstract = "We present a scalable, open-source platform that {``}distills{''} a potentially large text collection into a knowledge graph. Our platform takes documents stored in Apache Solr and scales out the Stanford CoreNLP toolkit via Apache Spark integration to extract mentions and relations that are then ingested into the Neo4j graph database. The raw knowledge graph is then enriched with facts extracted from an external knowledge graph. The complete product can be manipulated by various applications using Neo4j{'}s native Cypher query language: We present a subgraph-matching approach to align extracted relations with external facts and show that fact verification, locating textual support for asserted facts, detecting inconsistent and missing facts, and extracting distantly-supervised training data can all be performed within the same framework.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="clancy-etal-2019-scalable">
<titleInfo>
<title>Scalable Knowledge Graph Construction from Text Collections</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Clancy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ihab</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Ilyas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimmy</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a scalable, open-source platform that “distills” a potentially large text collection into a knowledge graph. Our platform takes documents stored in Apache Solr and scales out the Stanford CoreNLP toolkit via Apache Spark integration to extract mentions and relations that are then ingested into the Neo4j graph database. The raw knowledge graph is then enriched with facts extracted from an external knowledge graph. The complete product can be manipulated by various applications using Neo4j’s native Cypher query language: We present a subgraph-matching approach to align extracted relations with external facts and show that fact verification, locating textual support for asserted facts, detecting inconsistent and missing facts, and extracting distantly-supervised training data can all be performed within the same framework.</abstract>
<identifier type="citekey">clancy-etal-2019-scalable</identifier>
<identifier type="doi">10.18653/v1/D19-6607</identifier>
<location>
<url>https://aclanthology.org/D19-6607</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>39</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scalable Knowledge Graph Construction from Text Collections
%A Clancy, Ryan
%A Ilyas, Ihab F.
%A Lin, Jimmy
%Y Thorne, James
%Y Vlachos, Andreas
%Y Cocarascu, Oana
%Y Christodoulopoulos, Christos
%Y Mittal, Arpit
%S Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F clancy-etal-2019-scalable
%X We present a scalable, open-source platform that “distills” a potentially large text collection into a knowledge graph. Our platform takes documents stored in Apache Solr and scales out the Stanford CoreNLP toolkit via Apache Spark integration to extract mentions and relations that are then ingested into the Neo4j graph database. The raw knowledge graph is then enriched with facts extracted from an external knowledge graph. The complete product can be manipulated by various applications using Neo4j’s native Cypher query language: We present a subgraph-matching approach to align extracted relations with external facts and show that fact verification, locating textual support for asserted facts, detecting inconsistent and missing facts, and extracting distantly-supervised training data can all be performed within the same framework.
%R 10.18653/v1/D19-6607
%U https://aclanthology.org/D19-6607
%U https://doi.org/10.18653/v1/D19-6607
%P 39-46
Markdown (Informal)
[Scalable Knowledge Graph Construction from Text Collections](https://aclanthology.org/D19-6607) (Clancy et al., 2019)
ACL