@inproceedings{jin-etal-2020-genwiki,
title = "{G}en{W}iki: A Dataset of 1.3 Million Content-Sharing Text and Graphs for Unsupervised Graph-to-Text Generation",
author = "Jin, Zhijing and
Guo, Qipeng and
Qiu, Xipeng and
Zhang, Zheng",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.217",
doi = "10.18653/v1/2020.coling-main.217",
pages = "2398--2409",
abstract = "Data collection for the knowledge graph-to-text generation is expensive. As a result, research on unsupervised models has emerged as an active field recently. However, most unsupervised models have to use non-parallel versions of existing small supervised datasets, which largely constrain their potential. In this paper, we propose a large-scale, general-domain dataset, GenWiki. Our unsupervised dataset has 1.3M text and graph examples, respectively. With a human-annotated test set, we provide this new benchmark dataset for future research on unsupervised text generation from knowledge graphs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jin-etal-2020-genwiki">
<titleInfo>
<title>GenWiki: A Dataset of 1.3 Million Content-Sharing Text and Graphs for Unsupervised Graph-to-Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhijing</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qipeng</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data collection for the knowledge graph-to-text generation is expensive. As a result, research on unsupervised models has emerged as an active field recently. However, most unsupervised models have to use non-parallel versions of existing small supervised datasets, which largely constrain their potential. In this paper, we propose a large-scale, general-domain dataset, GenWiki. Our unsupervised dataset has 1.3M text and graph examples, respectively. With a human-annotated test set, we provide this new benchmark dataset for future research on unsupervised text generation from knowledge graphs.</abstract>
<identifier type="citekey">jin-etal-2020-genwiki</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.217</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.217</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>2398</start>
<end>2409</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GenWiki: A Dataset of 1.3 Million Content-Sharing Text and Graphs for Unsupervised Graph-to-Text Generation
%A Jin, Zhijing
%A Guo, Qipeng
%A Qiu, Xipeng
%A Zhang, Zheng
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F jin-etal-2020-genwiki
%X Data collection for the knowledge graph-to-text generation is expensive. As a result, research on unsupervised models has emerged as an active field recently. However, most unsupervised models have to use non-parallel versions of existing small supervised datasets, which largely constrain their potential. In this paper, we propose a large-scale, general-domain dataset, GenWiki. Our unsupervised dataset has 1.3M text and graph examples, respectively. With a human-annotated test set, we provide this new benchmark dataset for future research on unsupervised text generation from knowledge graphs.
%R 10.18653/v1/2020.coling-main.217
%U https://aclanthology.org/2020.coling-main.217
%U https://doi.org/10.18653/v1/2020.coling-main.217
%P 2398-2409
Markdown (Informal)
[GenWiki: A Dataset of 1.3 Million Content-Sharing Text and Graphs for Unsupervised Graph-to-Text Generation](https://aclanthology.org/2020.coling-main.217) (Jin et al., COLING 2020)
ACL