@inproceedings{von-essen-hesslow-2020-building,
title = "Building a {S}wedish Question-Answering Model",
author = "von Essen, Hannes and
Hesslow, Daniel",
booktitle = "Proceedings of the Probability and Meaning Conference (PaM 2020)",
month = jun,
year = "2020",
address = "Gothenburg",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.pam-1.16",
pages = "117--127",
abstract = "High quality datasets for question answering exist in a few languages, but far from all. Producing such datasets for new languages requires extensive manual labour. In this work we look at different methods for using existing datasets to train question-answering models in languages lacking such datasets. We show that machine translation followed by cross-lingual projection is a viable way to create a full question-answering dataset in a new language. We introduce new methods both for bitext alignment, using optimal transport, and for direct cross-lingual projection, utilizing multilingual BERT. We show that our methods produce good Swedish question-answering models without any manual work. Finally, we apply our proposed methods on Spanish and evaluate it on the XQuAD and MLQA benchmarks where we achieve new state-of-the-art values of 80.4 F1 and 62.9 Exact Match (EM) points on the Spanish XQuAD corpus and 70.8 F1 and 53.0 EM on the Spanish MLQA corpus, showing that the technique is readily applicable to other languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="von-essen-hesslow-2020-building">
<titleInfo>
<title>Building a Swedish Question-Answering Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannes</namePart>
<namePart type="family">von Essen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hesslow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Probability and Meaning Conference (PaM 2020)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>High quality datasets for question answering exist in a few languages, but far from all. Producing such datasets for new languages requires extensive manual labour. In this work we look at different methods for using existing datasets to train question-answering models in languages lacking such datasets. We show that machine translation followed by cross-lingual projection is a viable way to create a full question-answering dataset in a new language. We introduce new methods both for bitext alignment, using optimal transport, and for direct cross-lingual projection, utilizing multilingual BERT. We show that our methods produce good Swedish question-answering models without any manual work. Finally, we apply our proposed methods on Spanish and evaluate it on the XQuAD and MLQA benchmarks where we achieve new state-of-the-art values of 80.4 F1 and 62.9 Exact Match (EM) points on the Spanish XQuAD corpus and 70.8 F1 and 53.0 EM on the Spanish MLQA corpus, showing that the technique is readily applicable to other languages.</abstract>
<identifier type="citekey">von-essen-hesslow-2020-building</identifier>
<location>
<url>https://aclanthology.org/2020.pam-1.16</url>
</location>
<part>
<date>2020-06</date>
<extent unit="page">
<start>117</start>
<end>127</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Swedish Question-Answering Model
%A von Essen, Hannes
%A Hesslow, Daniel
%S Proceedings of the Probability and Meaning Conference (PaM 2020)
%D 2020
%8 June
%I Association for Computational Linguistics
%C Gothenburg
%F von-essen-hesslow-2020-building
%X High quality datasets for question answering exist in a few languages, but far from all. Producing such datasets for new languages requires extensive manual labour. In this work we look at different methods for using existing datasets to train question-answering models in languages lacking such datasets. We show that machine translation followed by cross-lingual projection is a viable way to create a full question-answering dataset in a new language. We introduce new methods both for bitext alignment, using optimal transport, and for direct cross-lingual projection, utilizing multilingual BERT. We show that our methods produce good Swedish question-answering models without any manual work. Finally, we apply our proposed methods on Spanish and evaluate it on the XQuAD and MLQA benchmarks where we achieve new state-of-the-art values of 80.4 F1 and 62.9 Exact Match (EM) points on the Spanish XQuAD corpus and 70.8 F1 and 53.0 EM on the Spanish MLQA corpus, showing that the technique is readily applicable to other languages.
%U https://aclanthology.org/2020.pam-1.16
%P 117-127
Markdown (Informal)
[Building a Swedish Question-Answering Model](https://aclanthology.org/2020.pam-1.16) (von Essen & Hesslow, PaM 2020)
ACL
- Hannes von Essen and Daniel Hesslow. 2020. Building a Swedish Question-Answering Model. In Proceedings of the Probability and Meaning Conference (PaM 2020), pages 117–127, Gothenburg. Association for Computational Linguistics.