@article{longpre-etal-2021-mkqa,
title = "{MKQA}: A Linguistically Diverse Benchmark for Multilingual Open Domain Question Answering",
author = "Longpre, Shayne and
Lu, Yi and
Daiber, Joachim",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "9",
year = "2021",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2021.tacl-1.82",
doi = "10.1162/tacl_a_00433",
pages = "1389--1406",
abstract = "Progress in cross-lingual modeling depends on challenging, realistic, and diverse evaluation sets. We introduce Multilingual Knowledge Questions and Answers (MKQA), an open- domain question answering evaluation set comprising 10k question-answer pairs aligned across 26 typologically diverse languages (260k question-answer pairs in total). Answers are based on heavily curated, language- independent data representation, making results comparable across languages and independent of language-specific passages. With 26 languages, this dataset supplies the widest range of languages to-date for evaluating question answering. We benchmark a variety of state- of-the-art methods and baselines for generative and extractive question answering, trained on Natural Questions, in zero shot and translation settings. Results indicate this dataset is challenging even in English, but especially in low-resource languages.1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="longpre-etal-2021-mkqa">
<titleInfo>
<title>MKQA: A Linguistically Diverse Benchmark for Multilingual Open Domain Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shayne</namePart>
<namePart type="family">Longpre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joachim</namePart>
<namePart type="family">Daiber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Progress in cross-lingual modeling depends on challenging, realistic, and diverse evaluation sets. We introduce Multilingual Knowledge Questions and Answers (MKQA), an open- domain question answering evaluation set comprising 10k question-answer pairs aligned across 26 typologically diverse languages (260k question-answer pairs in total). Answers are based on heavily curated, language- independent data representation, making results comparable across languages and independent of language-specific passages. With 26 languages, this dataset supplies the widest range of languages to-date for evaluating question answering. We benchmark a variety of state- of-the-art methods and baselines for generative and extractive question answering, trained on Natural Questions, in zero shot and translation settings. Results indicate this dataset is challenging even in English, but especially in low-resource languages.1</abstract>
<identifier type="citekey">longpre-etal-2021-mkqa</identifier>
<identifier type="doi">10.1162/tacl_a_00433</identifier>
<location>
<url>https://aclanthology.org/2021.tacl-1.82</url>
</location>
<part>
<date>2021</date>
<detail type="volume"><number>9</number></detail>
<extent unit="page">
<start>1389</start>
<end>1406</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T MKQA: A Linguistically Diverse Benchmark for Multilingual Open Domain Question Answering
%A Longpre, Shayne
%A Lu, Yi
%A Daiber, Joachim
%J Transactions of the Association for Computational Linguistics
%D 2021
%V 9
%I MIT Press
%C Cambridge, MA
%F longpre-etal-2021-mkqa
%X Progress in cross-lingual modeling depends on challenging, realistic, and diverse evaluation sets. We introduce Multilingual Knowledge Questions and Answers (MKQA), an open- domain question answering evaluation set comprising 10k question-answer pairs aligned across 26 typologically diverse languages (260k question-answer pairs in total). Answers are based on heavily curated, language- independent data representation, making results comparable across languages and independent of language-specific passages. With 26 languages, this dataset supplies the widest range of languages to-date for evaluating question answering. We benchmark a variety of state- of-the-art methods and baselines for generative and extractive question answering, trained on Natural Questions, in zero shot and translation settings. Results indicate this dataset is challenging even in English, but especially in low-resource languages.1
%R 10.1162/tacl_a_00433
%U https://aclanthology.org/2021.tacl-1.82
%U https://doi.org/10.1162/tacl_a_00433
%P 1389-1406
Markdown (Informal)
[MKQA: A Linguistically Diverse Benchmark for Multilingual Open Domain Question Answering](https://aclanthology.org/2021.tacl-1.82) (Longpre et al., TACL 2021)
ACL