@inproceedings{nan-etal-2024-omg,
title = "{OMG}-{QA}: Building Open-Domain Multi-Modal Generative Question Answering Systems",
author = "Nan, Linyong and
Fang, Weining and
Rasteh, Aylin and
Lahabi, Pouya and
Zou, Weijin and
Zhao, Yilun and
Cohan, Arman",
editor = "Dernoncourt, Franck and
Preo{\c{t}}iuc-Pietro, Daniel and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2024",
address = "Miami, Florida, US",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-industry.75",
doi = "10.18653/v1/2024.emnlp-industry.75",
pages = "1001--1015",
abstract = "We introduce OMG-QA, a new resource for question answering that is designed to evaluate the effectiveness of question answering systems that perform retrieval augmented generation (RAG) in scenarios that demand reasoning on multi-modal, multi-document contexts. These systems, given a user query, must retrieve relevant contexts from the web, which may include non-textual information, and then reason and synthesize these contents to generate a detailed, coherent answer. Unlike existing open-domain QA datasets, OMG-QA requires systems to navigate and integrate diverse modalities and a broad pool of information sources, making it uniquely challenging. We conduct a thorough evaluation and analysis of a diverse set of QA systems, featuring various retrieval frameworks, document retrievers, document indexing approaches, evidence retrieval methods, and LLMs tasked with both information retrieval and generation. Our findings reveal significant limitations in existing approaches using RAG or LLM agents to address open questions that require long-form answers supported by multi-modal evidence. We believe that OMG-QA will be a valuable resource for developing QA systems that are better equipped to handle open-domain, multi-modal information-seeking tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nan-etal-2024-omg">
<titleInfo>
<title>OMG-QA: Building Open-Domain Multi-Modal Generative Question Answering Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Linyong</namePart>
<namePart type="family">Nan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weining</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aylin</namePart>
<namePart type="family">Rasteh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pouya</namePart>
<namePart type="family">Lahabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weijin</namePart>
<namePart type="family">Zou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yilun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Cohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Dernoncourt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoţiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, US</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce OMG-QA, a new resource for question answering that is designed to evaluate the effectiveness of question answering systems that perform retrieval augmented generation (RAG) in scenarios that demand reasoning on multi-modal, multi-document contexts. These systems, given a user query, must retrieve relevant contexts from the web, which may include non-textual information, and then reason and synthesize these contents to generate a detailed, coherent answer. Unlike existing open-domain QA datasets, OMG-QA requires systems to navigate and integrate diverse modalities and a broad pool of information sources, making it uniquely challenging. We conduct a thorough evaluation and analysis of a diverse set of QA systems, featuring various retrieval frameworks, document retrievers, document indexing approaches, evidence retrieval methods, and LLMs tasked with both information retrieval and generation. Our findings reveal significant limitations in existing approaches using RAG or LLM agents to address open questions that require long-form answers supported by multi-modal evidence. We believe that OMG-QA will be a valuable resource for developing QA systems that are better equipped to handle open-domain, multi-modal information-seeking tasks.</abstract>
<identifier type="citekey">nan-etal-2024-omg</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-industry.75</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-industry.75</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>1001</start>
<end>1015</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OMG-QA: Building Open-Domain Multi-Modal Generative Question Answering Systems
%A Nan, Linyong
%A Fang, Weining
%A Rasteh, Aylin
%A Lahabi, Pouya
%A Zou, Weijin
%A Zhao, Yilun
%A Cohan, Arman
%Y Dernoncourt, Franck
%Y Preoţiuc-Pietro, Daniel
%Y Shimorina, Anastasia
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, US
%F nan-etal-2024-omg
%X We introduce OMG-QA, a new resource for question answering that is designed to evaluate the effectiveness of question answering systems that perform retrieval augmented generation (RAG) in scenarios that demand reasoning on multi-modal, multi-document contexts. These systems, given a user query, must retrieve relevant contexts from the web, which may include non-textual information, and then reason and synthesize these contents to generate a detailed, coherent answer. Unlike existing open-domain QA datasets, OMG-QA requires systems to navigate and integrate diverse modalities and a broad pool of information sources, making it uniquely challenging. We conduct a thorough evaluation and analysis of a diverse set of QA systems, featuring various retrieval frameworks, document retrievers, document indexing approaches, evidence retrieval methods, and LLMs tasked with both information retrieval and generation. Our findings reveal significant limitations in existing approaches using RAG or LLM agents to address open questions that require long-form answers supported by multi-modal evidence. We believe that OMG-QA will be a valuable resource for developing QA systems that are better equipped to handle open-domain, multi-modal information-seeking tasks.
%R 10.18653/v1/2024.emnlp-industry.75
%U https://aclanthology.org/2024.emnlp-industry.75
%U https://doi.org/10.18653/v1/2024.emnlp-industry.75
%P 1001-1015
Markdown (Informal)
[OMG-QA: Building Open-Domain Multi-Modal Generative Question Answering Systems](https://aclanthology.org/2024.emnlp-industry.75) (Nan et al., EMNLP 2024)
ACL
- Linyong Nan, Weining Fang, Aylin Rasteh, Pouya Lahabi, Weijin Zou, Yilun Zhao, and Arman Cohan. 2024. OMG-QA: Building Open-Domain Multi-Modal Generative Question Answering Systems. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 1001–1015, Miami, Florida, US. Association for Computational Linguistics.