@inproceedings{sen-etal-2022-mintaka,
title = "Mintaka: A Complex, Natural, and Multilingual Dataset for End-to-End Question Answering",
author = "Sen, Priyanka and
Aji, Alham Fikri and
Saffari, Amir",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.138",
pages = "1604--1619",
abstract = "We introduce Mintaka, a complex, natural, and multilingual dataset designed for experimenting with end-to-end question-answering models. Mintaka is composed of 20,000 question-answer pairs collected in English, annotated with Wikidata entities, and translated into Arabic, French, German, Hindi, Italian, Japanese, Portuguese, and Spanish for a total of 180,000 samples. Mintaka includes 8 types of complex questions, including superlative, intersection, and multi-hop questions, which were naturally elicited from crowd workers. We run baselines over Mintaka, the best of which achieves 38{\%} hits@1 in English and 31{\%} hits@1 multilingually, showing that existing models have room for improvement. We release Mintaka at https://github.com/amazon-research/mintaka.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sen-etal-2022-mintaka">
<titleInfo>
<title>Mintaka: A Complex, Natural, and Multilingual Dataset for End-to-End Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Priyanka</namePart>
<namePart type="family">Sen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alham</namePart>
<namePart type="given">Fikri</namePart>
<namePart type="family">Aji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Saffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce Mintaka, a complex, natural, and multilingual dataset designed for experimenting with end-to-end question-answering models. Mintaka is composed of 20,000 question-answer pairs collected in English, annotated with Wikidata entities, and translated into Arabic, French, German, Hindi, Italian, Japanese, Portuguese, and Spanish for a total of 180,000 samples. Mintaka includes 8 types of complex questions, including superlative, intersection, and multi-hop questions, which were naturally elicited from crowd workers. We run baselines over Mintaka, the best of which achieves 38% hits@1 in English and 31% hits@1 multilingually, showing that existing models have room for improvement. We release Mintaka at https://github.com/amazon-research/mintaka.</abstract>
<identifier type="citekey">sen-etal-2022-mintaka</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.138</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>1604</start>
<end>1619</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mintaka: A Complex, Natural, and Multilingual Dataset for End-to-End Question Answering
%A Sen, Priyanka
%A Aji, Alham Fikri
%A Saffari, Amir
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F sen-etal-2022-mintaka
%X We introduce Mintaka, a complex, natural, and multilingual dataset designed for experimenting with end-to-end question-answering models. Mintaka is composed of 20,000 question-answer pairs collected in English, annotated with Wikidata entities, and translated into Arabic, French, German, Hindi, Italian, Japanese, Portuguese, and Spanish for a total of 180,000 samples. Mintaka includes 8 types of complex questions, including superlative, intersection, and multi-hop questions, which were naturally elicited from crowd workers. We run baselines over Mintaka, the best of which achieves 38% hits@1 in English and 31% hits@1 multilingually, showing that existing models have room for improvement. We release Mintaka at https://github.com/amazon-research/mintaka.
%U https://aclanthology.org/2022.coling-1.138
%P 1604-1619
Markdown (Informal)
[Mintaka: A Complex, Natural, and Multilingual Dataset for End-to-End Question Answering](https://aclanthology.org/2022.coling-1.138) (Sen et al., COLING 2022)
ACL