@inproceedings{kostic-etal-2021-multi,
title = "Multi-modal Retrieval of Tables and Texts Using Tri-encoder Models",
author = {Kosti{\'c}, Bogdan and
Risch, Julian and
M{\"o}ller, Timo},
editor = "Fisch, Adam and
Talmor, Alon and
Chen, Danqi and
Choi, Eunsol and
Seo, Minjoon and
Lewis, Patrick and
Jia, Robin and
Min, Sewon",
booktitle = "Proceedings of the 3rd Workshop on Machine Reading for Question Answering",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.mrqa-1.8/",
doi = "10.18653/v1/2021.mrqa-1.8",
pages = "82--91",
abstract = "Open-domain extractive question answering works well on textual data by first retrieving candidate texts and then extracting the answer from those candidates. However, some questions cannot be answered by text alone but require information stored in tables. In this paper, we present an approach for retrieving both texts and tables relevant to a question by jointly encoding texts, tables and questions into a single vector space. To this end, we create a new multi-modal dataset based on text and table datasets from related work and compare the retrieval performance of different encoding schemata. We find that dense vector embeddings of transformer models outperform sparse embeddings on four out of six evaluation datasets. Comparing different dense embedding models, tri-encoders with one encoder for each question, text and table increase retrieval performance compared to bi-encoders with one encoder for the question and one for both text and tables. We release the newly created multi-modal dataset to the community so that it can be used for training and evaluation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kostic-etal-2021-multi">
<titleInfo>
<title>Multi-modal Retrieval of Tables and Texts Using Tri-encoder Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bogdan</namePart>
<namePart type="family">Kostić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Risch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timo</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Machine Reading for Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Fisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Talmor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minjoon</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sewon</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open-domain extractive question answering works well on textual data by first retrieving candidate texts and then extracting the answer from those candidates. However, some questions cannot be answered by text alone but require information stored in tables. In this paper, we present an approach for retrieving both texts and tables relevant to a question by jointly encoding texts, tables and questions into a single vector space. To this end, we create a new multi-modal dataset based on text and table datasets from related work and compare the retrieval performance of different encoding schemata. We find that dense vector embeddings of transformer models outperform sparse embeddings on four out of six evaluation datasets. Comparing different dense embedding models, tri-encoders with one encoder for each question, text and table increase retrieval performance compared to bi-encoders with one encoder for the question and one for both text and tables. We release the newly created multi-modal dataset to the community so that it can be used for training and evaluation.</abstract>
<identifier type="citekey">kostic-etal-2021-multi</identifier>
<identifier type="doi">10.18653/v1/2021.mrqa-1.8</identifier>
<location>
<url>https://aclanthology.org/2021.mrqa-1.8/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>82</start>
<end>91</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-modal Retrieval of Tables and Texts Using Tri-encoder Models
%A Kostić, Bogdan
%A Risch, Julian
%A Möller, Timo
%Y Fisch, Adam
%Y Talmor, Alon
%Y Chen, Danqi
%Y Choi, Eunsol
%Y Seo, Minjoon
%Y Lewis, Patrick
%Y Jia, Robin
%Y Min, Sewon
%S Proceedings of the 3rd Workshop on Machine Reading for Question Answering
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F kostic-etal-2021-multi
%X Open-domain extractive question answering works well on textual data by first retrieving candidate texts and then extracting the answer from those candidates. However, some questions cannot be answered by text alone but require information stored in tables. In this paper, we present an approach for retrieving both texts and tables relevant to a question by jointly encoding texts, tables and questions into a single vector space. To this end, we create a new multi-modal dataset based on text and table datasets from related work and compare the retrieval performance of different encoding schemata. We find that dense vector embeddings of transformer models outperform sparse embeddings on four out of six evaluation datasets. Comparing different dense embedding models, tri-encoders with one encoder for each question, text and table increase retrieval performance compared to bi-encoders with one encoder for the question and one for both text and tables. We release the newly created multi-modal dataset to the community so that it can be used for training and evaluation.
%R 10.18653/v1/2021.mrqa-1.8
%U https://aclanthology.org/2021.mrqa-1.8/
%U https://doi.org/10.18653/v1/2021.mrqa-1.8
%P 82-91
Markdown (Informal)
[Multi-modal Retrieval of Tables and Texts Using Tri-encoder Models](https://aclanthology.org/2021.mrqa-1.8/) (Kostić et al., MRQA 2021)
ACL