@inproceedings{parida-etal-2023-havqa,
title = "{H}a{VQA}: A Dataset for Visual Question Answering and Multimodal Research in {H}ausa Language",
author = "Parida, Shantipriya and
Abdulmumin, Idris and
Muhammad, Shamsuddeen Hassan and
Bose, Aneesh and
Kohli, Guneet Singh and
Ahmad, Ibrahim Said and
Kotwal, Ketan and
Deb Sarkar, Sayan and
Bojar, Ond{\v{r}}ej and
Kakudi, Habeebah",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.646/",
doi = "10.18653/v1/2023.findings-acl.646",
pages = "10162--10183",
abstract = "This paper presents {\textquotedblleft}HaVQA{\textquotedblright}, the first multimodal dataset for visual question answering (VQA) tasks in the Hausa language. The dataset was created by manually translating 6,022 English question-answer pairs, which are associated with 1,555 unique images from the Visual Genome dataset. As a result, the dataset provides 12,044 gold standard English-Hausa parallel sentences that were translated in a fashion that guarantees their semantic match with the corresponding visual information. We conducted several baseline experiments on the dataset, including visual question answering, visual question elicitation, text-only and multimodal machine translation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parida-etal-2023-havqa">
<titleInfo>
<title>HaVQA: A Dataset for Visual Question Answering and Multimodal Research in Hausa Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shantipriya</namePart>
<namePart type="family">Parida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shamsuddeen</namePart>
<namePart type="given">Hassan</namePart>
<namePart type="family">Muhammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aneesh</namePart>
<namePart type="family">Bose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guneet</namePart>
<namePart type="given">Singh</namePart>
<namePart type="family">Kohli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="given">Said</namePart>
<namePart type="family">Ahmad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ketan</namePart>
<namePart type="family">Kotwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sayan</namePart>
<namePart type="family">Deb Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Habeebah</namePart>
<namePart type="family">Kakudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents “HaVQA”, the first multimodal dataset for visual question answering (VQA) tasks in the Hausa language. The dataset was created by manually translating 6,022 English question-answer pairs, which are associated with 1,555 unique images from the Visual Genome dataset. As a result, the dataset provides 12,044 gold standard English-Hausa parallel sentences that were translated in a fashion that guarantees their semantic match with the corresponding visual information. We conducted several baseline experiments on the dataset, including visual question answering, visual question elicitation, text-only and multimodal machine translation.</abstract>
<identifier type="citekey">parida-etal-2023-havqa</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.646</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.646/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>10162</start>
<end>10183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HaVQA: A Dataset for Visual Question Answering and Multimodal Research in Hausa Language
%A Parida, Shantipriya
%A Abdulmumin, Idris
%A Muhammad, Shamsuddeen Hassan
%A Bose, Aneesh
%A Kohli, Guneet Singh
%A Ahmad, Ibrahim Said
%A Kotwal, Ketan
%A Deb Sarkar, Sayan
%A Bojar, Ondřej
%A Kakudi, Habeebah
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F parida-etal-2023-havqa
%X This paper presents “HaVQA”, the first multimodal dataset for visual question answering (VQA) tasks in the Hausa language. The dataset was created by manually translating 6,022 English question-answer pairs, which are associated with 1,555 unique images from the Visual Genome dataset. As a result, the dataset provides 12,044 gold standard English-Hausa parallel sentences that were translated in a fashion that guarantees their semantic match with the corresponding visual information. We conducted several baseline experiments on the dataset, including visual question answering, visual question elicitation, text-only and multimodal machine translation.
%R 10.18653/v1/2023.findings-acl.646
%U https://aclanthology.org/2023.findings-acl.646/
%U https://doi.org/10.18653/v1/2023.findings-acl.646
%P 10162-10183
Markdown (Informal)
[HaVQA: A Dataset for Visual Question Answering and Multimodal Research in Hausa Language](https://aclanthology.org/2023.findings-acl.646/) (Parida et al., Findings 2023)
ACL
- Shantipriya Parida, Idris Abdulmumin, Shamsuddeen Hassan Muhammad, Aneesh Bose, Guneet Singh Kohli, Ibrahim Said Ahmad, Ketan Kotwal, Sayan Deb Sarkar, Ondřej Bojar, and Habeebah Kakudi. 2023. HaVQA: A Dataset for Visual Question Answering and Multimodal Research in Hausa Language. In Findings of the Association for Computational Linguistics: ACL 2023, pages 10162–10183, Toronto, Canada. Association for Computational Linguistics.