@article{singhania-etal-2022-predicting,
title = "Predicting Document Coverage for Relation Extraction",
author = "Singhania, Sneha and
Razniewski, Simon and
Weikum, Gerhard",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.tacl-1.12",
doi = "10.1162/tacl_a_00456",
pages = "207--223",
abstract = "This paper presents a new task of predicting the coverage of a text document for relation extraction (RE): Does the document contain many relational tuples for a given entity? Coverage predictions are useful in selecting the best documents for knowledge base construction with large input corpora. To study this problem, we present a dataset of 31,366 diverse documents for 520 entities. We analyze the correlation of document coverage with features like length, entity mention frequency, Alexa rank, language complexity, and information retrieval scores. Each of these features has only moderate predictive power. We employ methods combining features with statistical models like TF-IDF and language models like BERT. The model combining features and BERT, HERB, achieves an F1 score of up to 46{\%}. We demonstrate the utility of coverage predictions on two use cases: KB construction and claim refutation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singhania-etal-2022-predicting">
<titleInfo>
<title>Predicting Document Coverage for Relation Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sneha</namePart>
<namePart type="family">Singhania</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Razniewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerhard</namePart>
<namePart type="family">Weikum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This paper presents a new task of predicting the coverage of a text document for relation extraction (RE): Does the document contain many relational tuples for a given entity? Coverage predictions are useful in selecting the best documents for knowledge base construction with large input corpora. To study this problem, we present a dataset of 31,366 diverse documents for 520 entities. We analyze the correlation of document coverage with features like length, entity mention frequency, Alexa rank, language complexity, and information retrieval scores. Each of these features has only moderate predictive power. We employ methods combining features with statistical models like TF-IDF and language models like BERT. The model combining features and BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of coverage predictions on two use cases: KB construction and claim refutation.</abstract>
<identifier type="citekey">singhania-etal-2022-predicting</identifier>
<identifier type="doi">10.1162/tacl_a_00456</identifier>
<location>
<url>https://aclanthology.org/2022.tacl-1.12</url>
</location>
<part>
<date>2022</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>207</start>
<end>223</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Predicting Document Coverage for Relation Extraction
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%J Transactions of the Association for Computational Linguistics
%D 2022
%V 10
%I MIT Press
%C Cambridge, MA
%F singhania-etal-2022-predicting
%X This paper presents a new task of predicting the coverage of a text document for relation extraction (RE): Does the document contain many relational tuples for a given entity? Coverage predictions are useful in selecting the best documents for knowledge base construction with large input corpora. To study this problem, we present a dataset of 31,366 diverse documents for 520 entities. We analyze the correlation of document coverage with features like length, entity mention frequency, Alexa rank, language complexity, and information retrieval scores. Each of these features has only moderate predictive power. We employ methods combining features with statistical models like TF-IDF and language models like BERT. The model combining features and BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of coverage predictions on two use cases: KB construction and claim refutation.
%R 10.1162/tacl_a_00456
%U https://aclanthology.org/2022.tacl-1.12
%U https://doi.org/10.1162/tacl_a_00456
%P 207-223
Markdown (Informal)
[Predicting Document Coverage for Relation Extraction](https://aclanthology.org/2022.tacl-1.12) (Singhania et al., TACL 2022)
ACL