@inproceedings{amidei-etal-2018-evaluation,
title = "Evaluation methodologies in Automatic Question Generation 2013-2018",
author = "Amidei, Jacopo and
Piwek, Paul and
Willis, Alistair",
editor = "Krahmer, Emiel and
Gatt, Albert and
Goudbeek, Martijn",
booktitle = "Proceedings of the 11th International Conference on Natural Language Generation",
month = nov,
year = "2018",
address = "Tilburg University, The Netherlands",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6537",
doi = "10.18653/v1/W18-6537",
pages = "307--317",
abstract = "In the last few years Automatic Question Generation (AQG) has attracted increasing interest. In this paper we survey the evaluation methodologies used in AQG. Based on a sample of 37 papers, our research shows that the systems{'} development has not been accompanied by similar developments in the methodologies used for the systems{'} evaluation. Indeed, in the papers we examine here, we find a wide variety of both intrinsic and extrinsic evaluation methodologies. Such diverse evaluation practices make it difficult to reliably compare the quality of different generation systems. Our study suggests that, given the rapidly increasing level of research in the area, a common framework is urgently needed to compare the performance of AQG systems and NLG systems more generally.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amidei-etal-2018-evaluation">
<titleInfo>
<title>Evaluation methodologies in Automatic Question Generation 2013-2018</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jacopo</namePart>
<namePart type="family">Amidei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Piwek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alistair</namePart>
<namePart type="family">Willis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Krahmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Gatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martijn</namePart>
<namePart type="family">Goudbeek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tilburg University, The Netherlands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the last few years Automatic Question Generation (AQG) has attracted increasing interest. In this paper we survey the evaluation methodologies used in AQG. Based on a sample of 37 papers, our research shows that the systems’ development has not been accompanied by similar developments in the methodologies used for the systems’ evaluation. Indeed, in the papers we examine here, we find a wide variety of both intrinsic and extrinsic evaluation methodologies. Such diverse evaluation practices make it difficult to reliably compare the quality of different generation systems. Our study suggests that, given the rapidly increasing level of research in the area, a common framework is urgently needed to compare the performance of AQG systems and NLG systems more generally.</abstract>
<identifier type="citekey">amidei-etal-2018-evaluation</identifier>
<identifier type="doi">10.18653/v1/W18-6537</identifier>
<location>
<url>https://aclanthology.org/W18-6537</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>307</start>
<end>317</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation methodologies in Automatic Question Generation 2013-2018
%A Amidei, Jacopo
%A Piwek, Paul
%A Willis, Alistair
%Y Krahmer, Emiel
%Y Gatt, Albert
%Y Goudbeek, Martijn
%S Proceedings of the 11th International Conference on Natural Language Generation
%D 2018
%8 November
%I Association for Computational Linguistics
%C Tilburg University, The Netherlands
%F amidei-etal-2018-evaluation
%X In the last few years Automatic Question Generation (AQG) has attracted increasing interest. In this paper we survey the evaluation methodologies used in AQG. Based on a sample of 37 papers, our research shows that the systems’ development has not been accompanied by similar developments in the methodologies used for the systems’ evaluation. Indeed, in the papers we examine here, we find a wide variety of both intrinsic and extrinsic evaluation methodologies. Such diverse evaluation practices make it difficult to reliably compare the quality of different generation systems. Our study suggests that, given the rapidly increasing level of research in the area, a common framework is urgently needed to compare the performance of AQG systems and NLG systems more generally.
%R 10.18653/v1/W18-6537
%U https://aclanthology.org/W18-6537
%U https://doi.org/10.18653/v1/W18-6537
%P 307-317
Markdown (Informal)
[Evaluation methodologies in Automatic Question Generation 2013-2018](https://aclanthology.org/W18-6537) (Amidei et al., INLG 2018)
ACL