@inproceedings{albalak-etal-2023-addressing,
title = "Addressing Issues of Cross-Linguality in Open-Retrieval Question Answering Systems For Emergent Domains",
author = "Albalak, Alon and
Levy, Sharon and
Wang, William Yang",
editor = "Croce, Danilo and
Soldaini, Luca",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-demo.1",
doi = "10.18653/v1/2023.eacl-demo.1",
pages = "1--10",
abstract = "Open-retrieval question answering systems are generally trained and tested on large datasets in well-established domains. However, low-resource settings such as new and emerging domains would especially benefit from reliable question answering systems. Furthermore, multilingual and cross-lingual resources in emergent domains are scarce, leading to few or no such systems. In this paper, we demonstrate a cross-lingual open-retrieval question answering system for the emergent domain of COVID-19.Our system adopts a corpus of scientific articles to ensure that retrieved documents are reliable. To address the scarcity of cross-lingual training data in emergent domains, we present a method utilizing automatic translation, alignment, and filtering to produce English-to-all datasets. We show that a deep semantic retriever greatly benefits from training on our English-to-all data and significantly outperforms a BM25 baseline in the cross-lingual setting. We illustrate the capabilities of our system with examples and release all code necessary to train and deploy such a system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="albalak-etal-2023-addressing">
<titleInfo>
<title>Addressing Issues of Cross-Linguality in Open-Retrieval Question Answering Systems For Emergent Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Albalak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharon</namePart>
<namePart type="family">Levy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">Yang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Soldaini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open-retrieval question answering systems are generally trained and tested on large datasets in well-established domains. However, low-resource settings such as new and emerging domains would especially benefit from reliable question answering systems. Furthermore, multilingual and cross-lingual resources in emergent domains are scarce, leading to few or no such systems. In this paper, we demonstrate a cross-lingual open-retrieval question answering system for the emergent domain of COVID-19.Our system adopts a corpus of scientific articles to ensure that retrieved documents are reliable. To address the scarcity of cross-lingual training data in emergent domains, we present a method utilizing automatic translation, alignment, and filtering to produce English-to-all datasets. We show that a deep semantic retriever greatly benefits from training on our English-to-all data and significantly outperforms a BM25 baseline in the cross-lingual setting. We illustrate the capabilities of our system with examples and release all code necessary to train and deploy such a system.</abstract>
<identifier type="citekey">albalak-etal-2023-addressing</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-demo.1</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-demo.1</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Addressing Issues of Cross-Linguality in Open-Retrieval Question Answering Systems For Emergent Domains
%A Albalak, Alon
%A Levy, Sharon
%A Wang, William Yang
%Y Croce, Danilo
%Y Soldaini, Luca
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F albalak-etal-2023-addressing
%X Open-retrieval question answering systems are generally trained and tested on large datasets in well-established domains. However, low-resource settings such as new and emerging domains would especially benefit from reliable question answering systems. Furthermore, multilingual and cross-lingual resources in emergent domains are scarce, leading to few or no such systems. In this paper, we demonstrate a cross-lingual open-retrieval question answering system for the emergent domain of COVID-19.Our system adopts a corpus of scientific articles to ensure that retrieved documents are reliable. To address the scarcity of cross-lingual training data in emergent domains, we present a method utilizing automatic translation, alignment, and filtering to produce English-to-all datasets. We show that a deep semantic retriever greatly benefits from training on our English-to-all data and significantly outperforms a BM25 baseline in the cross-lingual setting. We illustrate the capabilities of our system with examples and release all code necessary to train and deploy such a system.
%R 10.18653/v1/2023.eacl-demo.1
%U https://aclanthology.org/2023.eacl-demo.1
%U https://doi.org/10.18653/v1/2023.eacl-demo.1
%P 1-10
Markdown (Informal)
[Addressing Issues of Cross-Linguality in Open-Retrieval Question Answering Systems For Emergent Domains](https://aclanthology.org/2023.eacl-demo.1) (Albalak et al., EACL 2023)
ACL