@inproceedings{acharya-etal-2019-vqd,
title = "{VQD}: Visual Query Detection In Natural Scenes",
author = "Acharya, Manoj and
Jariwala, Karan and
Kanan, Christopher",
editor = "Burstein, Jill and
Doran, Christy and
Solorio, Thamar",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1194",
doi = "10.18653/v1/N19-1194",
pages = "1955--1961",
abstract = "We propose a new visual grounding task called Visual Query Detection (VQD). In VQD, the task is to localize a \textit{variable} number of objects in an image where the objects are specified in natural language. VQD is related to visual referring expression comprehension, where the task is to localize only \textit{one} object. We propose the first algorithms for VQD, and we evaluate them on both visual referring expression datasets and our new VQDv1 dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="acharya-etal-2019-vqd">
<titleInfo>
<title>VQD: Visual Query Detection In Natural Scenes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manoj</namePart>
<namePart type="family">Acharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karan</namePart>
<namePart type="family">Jariwala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Kanan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christy</namePart>
<namePart type="family">Doran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a new visual grounding task called Visual Query Detection (VQD). In VQD, the task is to localize a variable number of objects in an image where the objects are specified in natural language. VQD is related to visual referring expression comprehension, where the task is to localize only one object. We propose the first algorithms for VQD, and we evaluate them on both visual referring expression datasets and our new VQDv1 dataset.</abstract>
<identifier type="citekey">acharya-etal-2019-vqd</identifier>
<identifier type="doi">10.18653/v1/N19-1194</identifier>
<location>
<url>https://aclanthology.org/N19-1194</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>1955</start>
<end>1961</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T VQD: Visual Query Detection In Natural Scenes
%A Acharya, Manoj
%A Jariwala, Karan
%A Kanan, Christopher
%Y Burstein, Jill
%Y Doran, Christy
%Y Solorio, Thamar
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F acharya-etal-2019-vqd
%X We propose a new visual grounding task called Visual Query Detection (VQD). In VQD, the task is to localize a variable number of objects in an image where the objects are specified in natural language. VQD is related to visual referring expression comprehension, where the task is to localize only one object. We propose the first algorithms for VQD, and we evaluate them on both visual referring expression datasets and our new VQDv1 dataset.
%R 10.18653/v1/N19-1194
%U https://aclanthology.org/N19-1194
%U https://doi.org/10.18653/v1/N19-1194
%P 1955-1961
Markdown (Informal)
[VQD: Visual Query Detection In Natural Scenes](https://aclanthology.org/N19-1194) (Acharya et al., NAACL 2019)
ACL
- Manoj Acharya, Karan Jariwala, and Christopher Kanan. 2019. VQD: Visual Query Detection In Natural Scenes. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 1955–1961, Minneapolis, Minnesota. Association for Computational Linguistics.