@inproceedings{snaebjarnarson-einarsson-2022-cross,
title = "Cross-Lingual {QA} as a Stepping Stone for Monolingual Open {QA} in {I}celandic",
author = "Sn{\ae}bjarnarson, V{\'e}steinn and
Einarsson, Hafsteinn",
editor = "Asai, Akari and
Choi, Eunsol and
Clark, Jonathan H. and
Hu, Junjie and
Lee, Chia-Hsuan and
Kasai, Jungo and
Longpre, Shayne and
Yamada, Ikuya and
Zhang, Rui",
booktitle = "Proceedings of the Workshop on Multilingual Information Access (MIA)",
month = jul,
year = "2022",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.mia-1.4",
doi = "10.18653/v1/2022.mia-1.4",
pages = "29--36",
abstract = "It can be challenging to build effective open question answering (open QA) systems for languages other than English, mainly due to a lack of labeled data for training. We present a data efficient method to bootstrap such a system for languages other than English. Our approach requires only limited QA resources in the given language, along with machine-translated data, and at least a bilingual language model. To evaluate our approach, we build such a system for the Icelandic language and evaluate performance over trivia style datasets. The corpora used for training are English in origin but machine translated into Icelandic. We train a bilingual Icelandic/English language model to embed English context and Icelandic questions following methodology introduced with DensePhrases (Lee et al., 2021). The resulting system is an open domain cross-lingual QA system between Icelandic and English. Finally, the system is adapted for Icelandic only open QA, demonstrating how it is possible to efficiently create an open QA system with limited access to curated datasets in the language of interest.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="snaebjarnarson-einarsson-2022-cross">
<titleInfo>
<title>Cross-Lingual QA as a Stepping Stone for Monolingual Open QA in Icelandic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vésteinn</namePart>
<namePart type="family">Snæbjarnarson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hafsteinn</namePart>
<namePart type="family">Einarsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Multilingual Information Access (MIA)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akari</namePart>
<namePart type="family">Asai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Hsuan</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jungo</namePart>
<namePart type="family">Kasai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shayne</namePart>
<namePart type="family">Longpre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ikuya</namePart>
<namePart type="family">Yamada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It can be challenging to build effective open question answering (open QA) systems for languages other than English, mainly due to a lack of labeled data for training. We present a data efficient method to bootstrap such a system for languages other than English. Our approach requires only limited QA resources in the given language, along with machine-translated data, and at least a bilingual language model. To evaluate our approach, we build such a system for the Icelandic language and evaluate performance over trivia style datasets. The corpora used for training are English in origin but machine translated into Icelandic. We train a bilingual Icelandic/English language model to embed English context and Icelandic questions following methodology introduced with DensePhrases (Lee et al., 2021). The resulting system is an open domain cross-lingual QA system between Icelandic and English. Finally, the system is adapted for Icelandic only open QA, demonstrating how it is possible to efficiently create an open QA system with limited access to curated datasets in the language of interest.</abstract>
<identifier type="citekey">snaebjarnarson-einarsson-2022-cross</identifier>
<identifier type="doi">10.18653/v1/2022.mia-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.mia-1.4</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>29</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-Lingual QA as a Stepping Stone for Monolingual Open QA in Icelandic
%A Snæbjarnarson, Vésteinn
%A Einarsson, Hafsteinn
%Y Asai, Akari
%Y Choi, Eunsol
%Y Clark, Jonathan H.
%Y Hu, Junjie
%Y Lee, Chia-Hsuan
%Y Kasai, Jungo
%Y Longpre, Shayne
%Y Yamada, Ikuya
%Y Zhang, Rui
%S Proceedings of the Workshop on Multilingual Information Access (MIA)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, USA
%F snaebjarnarson-einarsson-2022-cross
%X It can be challenging to build effective open question answering (open QA) systems for languages other than English, mainly due to a lack of labeled data for training. We present a data efficient method to bootstrap such a system for languages other than English. Our approach requires only limited QA resources in the given language, along with machine-translated data, and at least a bilingual language model. To evaluate our approach, we build such a system for the Icelandic language and evaluate performance over trivia style datasets. The corpora used for training are English in origin but machine translated into Icelandic. We train a bilingual Icelandic/English language model to embed English context and Icelandic questions following methodology introduced with DensePhrases (Lee et al., 2021). The resulting system is an open domain cross-lingual QA system between Icelandic and English. Finally, the system is adapted for Icelandic only open QA, demonstrating how it is possible to efficiently create an open QA system with limited access to curated datasets in the language of interest.
%R 10.18653/v1/2022.mia-1.4
%U https://aclanthology.org/2022.mia-1.4
%U https://doi.org/10.18653/v1/2022.mia-1.4
%P 29-36
Markdown (Informal)
[Cross-Lingual QA as a Stepping Stone for Monolingual Open QA in Icelandic](https://aclanthology.org/2022.mia-1.4) (Snæbjarnarson & Einarsson, MIA 2022)
ACL