@inproceedings{du-cardie-2018-harvesting,
title = "Harvesting Paragraph-level Question-Answer Pairs from {W}ikipedia",
author = "Du, Xinya and
Cardie, Claire",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-1177",
doi = "10.18653/v1/P18-1177",
pages = "1907--1917",
abstract = "We study the task of generating from Wikipedia articles question-answer pairs that cover content beyond a single sentence. We propose a neural network approach that incorporates coreference knowledge via a novel gating mechanism. As compared to models that only take into account sentence-level information (Heilman and Smith, 2010; Du et al., 2017; Zhou et al., 2017), we find that the linguistic knowledge introduced by the coreference representation aids question generation significantly, producing models that outperform the current state-of-the-art. We apply our system (composed of an answer span extraction system and the passage-level QG system) to the 10,000 top ranking Wikipedia articles and create a corpus of over one million question-answer pairs. We provide qualitative analysis for the this large-scale generated corpus from Wikipedia.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="du-cardie-2018-harvesting">
<titleInfo>
<title>Harvesting Paragraph-level Question-Answer Pairs from Wikipedia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinya</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Cardie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We study the task of generating from Wikipedia articles question-answer pairs that cover content beyond a single sentence. We propose a neural network approach that incorporates coreference knowledge via a novel gating mechanism. As compared to models that only take into account sentence-level information (Heilman and Smith, 2010; Du et al., 2017; Zhou et al., 2017), we find that the linguistic knowledge introduced by the coreference representation aids question generation significantly, producing models that outperform the current state-of-the-art. We apply our system (composed of an answer span extraction system and the passage-level QG system) to the 10,000 top ranking Wikipedia articles and create a corpus of over one million question-answer pairs. We provide qualitative analysis for the this large-scale generated corpus from Wikipedia.</abstract>
<identifier type="citekey">du-cardie-2018-harvesting</identifier>
<identifier type="doi">10.18653/v1/P18-1177</identifier>
<location>
<url>https://aclanthology.org/P18-1177</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>1907</start>
<end>1917</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Harvesting Paragraph-level Question-Answer Pairs from Wikipedia
%A Du, Xinya
%A Cardie, Claire
%Y Gurevych, Iryna
%Y Miyao, Yusuke
%S Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F du-cardie-2018-harvesting
%X We study the task of generating from Wikipedia articles question-answer pairs that cover content beyond a single sentence. We propose a neural network approach that incorporates coreference knowledge via a novel gating mechanism. As compared to models that only take into account sentence-level information (Heilman and Smith, 2010; Du et al., 2017; Zhou et al., 2017), we find that the linguistic knowledge introduced by the coreference representation aids question generation significantly, producing models that outperform the current state-of-the-art. We apply our system (composed of an answer span extraction system and the passage-level QG system) to the 10,000 top ranking Wikipedia articles and create a corpus of over one million question-answer pairs. We provide qualitative analysis for the this large-scale generated corpus from Wikipedia.
%R 10.18653/v1/P18-1177
%U https://aclanthology.org/P18-1177
%U https://doi.org/10.18653/v1/P18-1177
%P 1907-1917
Markdown (Informal)
[Harvesting Paragraph-level Question-Answer Pairs from Wikipedia](https://aclanthology.org/P18-1177) (Du & Cardie, ACL 2018)
ACL