@inproceedings{agarwal-etal-2022-zero,
title = "Zero-shot cross-lingual open domain question answering",
author = "Agarwal, Sumit and
Tripathi, Suraj and
Mitamura, Teruko and
Rose, Carolyn Penstein",
editor = "Asai, Akari and
Choi, Eunsol and
Clark, Jonathan H. and
Hu, Junjie and
Lee, Chia-Hsuan and
Kasai, Jungo and
Longpre, Shayne and
Yamada, Ikuya and
Zhang, Rui",
booktitle = "Proceedings of the Workshop on Multilingual Information Access (MIA)",
month = jul,
year = "2022",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.mia-1.9",
doi = "10.18653/v1/2022.mia-1.9",
pages = "91--99",
abstract = "People speaking different kinds of languages search for information in a cross-lingual manner. They tend to ask questions in their language and expect the answer to be in the same language, despite the evidence lying in another language. In this paper, we present our approach for this task of cross-lingual open-domain question-answering. Our proposed method employs a passage reranker, the fusion-in-decoder technique for generation, and a wiki data entity-based post-processing system to tackle the inability to generate entities across all languages. Our end-2-end pipeline shows an improvement of 3 and 4.6 points on F1 and EM metrics respectively, when compared with the baseline CORA model on the XOR-TyDi dataset. We also evaluate the effectiveness of our proposed techniques in the zero-shot setting using the MKQA dataset and show an improvement of 5 points in F1 for high-resource and 3 points improvement for low-resource zero-shot languages. Our team, CMUmQA{'}s submission in the MIA-Shared task ranked 1st in the constrained setup for the dev and 2nd in the test setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="agarwal-etal-2022-zero">
<titleInfo>
<title>Zero-shot cross-lingual open domain question answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sumit</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suraj</namePart>
<namePart type="family">Tripathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teruko</namePart>
<namePart type="family">Mitamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="given">Penstein</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Multilingual Information Access (MIA)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akari</namePart>
<namePart type="family">Asai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Hsuan</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jungo</namePart>
<namePart type="family">Kasai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shayne</namePart>
<namePart type="family">Longpre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ikuya</namePart>
<namePart type="family">Yamada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>People speaking different kinds of languages search for information in a cross-lingual manner. They tend to ask questions in their language and expect the answer to be in the same language, despite the evidence lying in another language. In this paper, we present our approach for this task of cross-lingual open-domain question-answering. Our proposed method employs a passage reranker, the fusion-in-decoder technique for generation, and a wiki data entity-based post-processing system to tackle the inability to generate entities across all languages. Our end-2-end pipeline shows an improvement of 3 and 4.6 points on F1 and EM metrics respectively, when compared with the baseline CORA model on the XOR-TyDi dataset. We also evaluate the effectiveness of our proposed techniques in the zero-shot setting using the MKQA dataset and show an improvement of 5 points in F1 for high-resource and 3 points improvement for low-resource zero-shot languages. Our team, CMUmQA’s submission in the MIA-Shared task ranked 1st in the constrained setup for the dev and 2nd in the test setting.</abstract>
<identifier type="citekey">agarwal-etal-2022-zero</identifier>
<identifier type="doi">10.18653/v1/2022.mia-1.9</identifier>
<location>
<url>https://aclanthology.org/2022.mia-1.9</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>91</start>
<end>99</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Zero-shot cross-lingual open domain question answering
%A Agarwal, Sumit
%A Tripathi, Suraj
%A Mitamura, Teruko
%A Rose, Carolyn Penstein
%Y Asai, Akari
%Y Choi, Eunsol
%Y Clark, Jonathan H.
%Y Hu, Junjie
%Y Lee, Chia-Hsuan
%Y Kasai, Jungo
%Y Longpre, Shayne
%Y Yamada, Ikuya
%Y Zhang, Rui
%S Proceedings of the Workshop on Multilingual Information Access (MIA)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, USA
%F agarwal-etal-2022-zero
%X People speaking different kinds of languages search for information in a cross-lingual manner. They tend to ask questions in their language and expect the answer to be in the same language, despite the evidence lying in another language. In this paper, we present our approach for this task of cross-lingual open-domain question-answering. Our proposed method employs a passage reranker, the fusion-in-decoder technique for generation, and a wiki data entity-based post-processing system to tackle the inability to generate entities across all languages. Our end-2-end pipeline shows an improvement of 3 and 4.6 points on F1 and EM metrics respectively, when compared with the baseline CORA model on the XOR-TyDi dataset. We also evaluate the effectiveness of our proposed techniques in the zero-shot setting using the MKQA dataset and show an improvement of 5 points in F1 for high-resource and 3 points improvement for low-resource zero-shot languages. Our team, CMUmQA’s submission in the MIA-Shared task ranked 1st in the constrained setup for the dev and 2nd in the test setting.
%R 10.18653/v1/2022.mia-1.9
%U https://aclanthology.org/2022.mia-1.9
%U https://doi.org/10.18653/v1/2022.mia-1.9
%P 91-99
Markdown (Informal)
[Zero-shot cross-lingual open domain question answering](https://aclanthology.org/2022.mia-1.9) (Agarwal et al., MIA 2022)
ACL
- Sumit Agarwal, Suraj Tripathi, Teruko Mitamura, and Carolyn Penstein Rose. 2022. Zero-shot cross-lingual open domain question answering. In Proceedings of the Workshop on Multilingual Information Access (MIA), pages 91–99, Seattle, USA. Association for Computational Linguistics.