@inproceedings{amouyal-etal-2023-qampari,
title = "{QAMPARI}: A Benchmark for Open-domain Questions with Many Answers",
author = "Amouyal, Samuel and
Wolfson, Tomer and
Rubin, Ohad and
Yoran, Ori and
Herzig, Jonathan and
Berant, Jonathan",
editor = "Gehrmann, Sebastian and
Wang, Alex and
Sedoc, Jo{\~a}o and
Clark, Elizabeth and
Dhole, Kaustubh and
Chandu, Khyathi Raghavi and
Santus, Enrico and
Sedghamiz, Hooman",
booktitle = "Proceedings of the Third Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.gem-1.9",
pages = "97--110",
abstract = "Existing benchmarks for open-domain question answering (ODQA) typically focus on questions whose answers are all in a single paragraph. By contrast, many natural questions, such as {``}What players were drafted by the Brooklyn Nets?{''} have a long list of answers extracted from multiple paragraphs. Answering such questions requires retrieving and reading many passages from a large corpus. We introduce QAMPARI, an ODQA benchmark, where answers are lists of entities, spread across many paragraphs. We created QAMPARI by (a) generating questions with multiple answers from Wikipedia{'}s knowledge graph and tables, (b) automatically pairing answers with supporting evidence in Wikipedia paragraphs, and (c) manually paraphrasing questions and validating each answer. Across a wide range of ODQA models, we find that QAMPARI is challenging in terms of both passage retrieval and answer generation, with models reaching an F1 score of 32.8 at best. We view QAMPARI as a valuable resource for ODQA research, which will aid to develop models that handle a broad range of question types, including single and multi-answer questions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amouyal-etal-2023-qampari">
<titleInfo>
<title>QAMPARI: A Benchmark for Open-domain Questions with Many Answers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Amouyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomer</namePart>
<namePart type="family">Wolfson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ohad</namePart>
<namePart type="family">Rubin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ori</namePart>
<namePart type="family">Yoran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Herzig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Berant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaustubh</namePart>
<namePart type="family">Dhole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="given">Raghavi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hooman</namePart>
<namePart type="family">Sedghamiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Existing benchmarks for open-domain question answering (ODQA) typically focus on questions whose answers are all in a single paragraph. By contrast, many natural questions, such as “What players were drafted by the Brooklyn Nets?” have a long list of answers extracted from multiple paragraphs. Answering such questions requires retrieving and reading many passages from a large corpus. We introduce QAMPARI, an ODQA benchmark, where answers are lists of entities, spread across many paragraphs. We created QAMPARI by (a) generating questions with multiple answers from Wikipedia’s knowledge graph and tables, (b) automatically pairing answers with supporting evidence in Wikipedia paragraphs, and (c) manually paraphrasing questions and validating each answer. Across a wide range of ODQA models, we find that QAMPARI is challenging in terms of both passage retrieval and answer generation, with models reaching an F1 score of 32.8 at best. We view QAMPARI as a valuable resource for ODQA research, which will aid to develop models that handle a broad range of question types, including single and multi-answer questions.</abstract>
<identifier type="citekey">amouyal-etal-2023-qampari</identifier>
<location>
<url>https://aclanthology.org/2023.gem-1.9</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>97</start>
<end>110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T QAMPARI: A Benchmark for Open-domain Questions with Many Answers
%A Amouyal, Samuel
%A Wolfson, Tomer
%A Rubin, Ohad
%A Yoran, Ori
%A Herzig, Jonathan
%A Berant, Jonathan
%Y Gehrmann, Sebastian
%Y Wang, Alex
%Y Sedoc, João
%Y Clark, Elizabeth
%Y Dhole, Kaustubh
%Y Chandu, Khyathi Raghavi
%Y Santus, Enrico
%Y Sedghamiz, Hooman
%S Proceedings of the Third Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F amouyal-etal-2023-qampari
%X Existing benchmarks for open-domain question answering (ODQA) typically focus on questions whose answers are all in a single paragraph. By contrast, many natural questions, such as “What players were drafted by the Brooklyn Nets?” have a long list of answers extracted from multiple paragraphs. Answering such questions requires retrieving and reading many passages from a large corpus. We introduce QAMPARI, an ODQA benchmark, where answers are lists of entities, spread across many paragraphs. We created QAMPARI by (a) generating questions with multiple answers from Wikipedia’s knowledge graph and tables, (b) automatically pairing answers with supporting evidence in Wikipedia paragraphs, and (c) manually paraphrasing questions and validating each answer. Across a wide range of ODQA models, we find that QAMPARI is challenging in terms of both passage retrieval and answer generation, with models reaching an F1 score of 32.8 at best. We view QAMPARI as a valuable resource for ODQA research, which will aid to develop models that handle a broad range of question types, including single and multi-answer questions.
%U https://aclanthology.org/2023.gem-1.9
%P 97-110
Markdown (Informal)
[QAMPARI: A Benchmark for Open-domain Questions with Many Answers](https://aclanthology.org/2023.gem-1.9) (Amouyal et al., GEM-WS 2023)
ACL
- Samuel Amouyal, Tomer Wolfson, Ohad Rubin, Ori Yoran, Jonathan Herzig, and Jonathan Berant. 2023. QAMPARI: A Benchmark for Open-domain Questions with Many Answers. In Proceedings of the Third Workshop on Natural Language Generation, Evaluation, and Metrics (GEM), pages 97–110, Singapore. Association for Computational Linguistics.