@inproceedings{belz-etal-2018-spatialvoc2k,
title = "{S}patial{VOC}2{K}: A Multilingual Dataset of Images with Annotations and Features for Spatial Relations between Objects",
author = "Belz, Anja and
Muscat, Adrian and
Anguill, Pierre and
Sow, Mouhamadou and
Vincent, Ga{\'e}tan and
Zinessabah, Yassine",
editor = "Krahmer, Emiel and
Gatt, Albert and
Goudbeek, Martijn",
booktitle = "Proceedings of the 11th International Conference on Natural Language Generation",
month = nov,
year = "2018",
address = "Tilburg University, The Netherlands",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6516",
doi = "10.18653/v1/W18-6516",
pages = "140--145",
abstract = "We present SpatialVOC2K, the first multilingual image dataset with spatial relation annotations and object features for image-to-text generation, built using 2,026 images from the PASCAL VOC2008 dataset. The dataset incorporates (i) the labelled object bounding boxes from VOC2008, (ii) geometrical, language and depth features for each object, and (iii) for each pair of objects in both orders, (a) the single best preposition and (b) the set of possible prepositions in the given language that describe the spatial relationship between the two objects. Compared to previous versions of the dataset, we have roughly doubled the size for French, and completely reannotated as well as increased the size of the English portion, providing single best prepositions for English for the first time. Furthermore, we have added explicit 3D depth features for objects. We are releasing our dataset for free reuse, along with evaluation tools to enable comparative evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="belz-etal-2018-spatialvoc2k">
<titleInfo>
<title>SpatialVOC2K: A Multilingual Dataset of Images with Annotations and Features for Spatial Relations between Objects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anja</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrian</namePart>
<namePart type="family">Muscat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Anguill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mouhamadou</namePart>
<namePart type="family">Sow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaétan</namePart>
<namePart type="family">Vincent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yassine</namePart>
<namePart type="family">Zinessabah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Krahmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Gatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martijn</namePart>
<namePart type="family">Goudbeek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tilburg University, The Netherlands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present SpatialVOC2K, the first multilingual image dataset with spatial relation annotations and object features for image-to-text generation, built using 2,026 images from the PASCAL VOC2008 dataset. The dataset incorporates (i) the labelled object bounding boxes from VOC2008, (ii) geometrical, language and depth features for each object, and (iii) for each pair of objects in both orders, (a) the single best preposition and (b) the set of possible prepositions in the given language that describe the spatial relationship between the two objects. Compared to previous versions of the dataset, we have roughly doubled the size for French, and completely reannotated as well as increased the size of the English portion, providing single best prepositions for English for the first time. Furthermore, we have added explicit 3D depth features for objects. We are releasing our dataset for free reuse, along with evaluation tools to enable comparative evaluation.</abstract>
<identifier type="citekey">belz-etal-2018-spatialvoc2k</identifier>
<identifier type="doi">10.18653/v1/W18-6516</identifier>
<location>
<url>https://aclanthology.org/W18-6516</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>140</start>
<end>145</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SpatialVOC2K: A Multilingual Dataset of Images with Annotations and Features for Spatial Relations between Objects
%A Belz, Anja
%A Muscat, Adrian
%A Anguill, Pierre
%A Sow, Mouhamadou
%A Vincent, Gaétan
%A Zinessabah, Yassine
%Y Krahmer, Emiel
%Y Gatt, Albert
%Y Goudbeek, Martijn
%S Proceedings of the 11th International Conference on Natural Language Generation
%D 2018
%8 November
%I Association for Computational Linguistics
%C Tilburg University, The Netherlands
%F belz-etal-2018-spatialvoc2k
%X We present SpatialVOC2K, the first multilingual image dataset with spatial relation annotations and object features for image-to-text generation, built using 2,026 images from the PASCAL VOC2008 dataset. The dataset incorporates (i) the labelled object bounding boxes from VOC2008, (ii) geometrical, language and depth features for each object, and (iii) for each pair of objects in both orders, (a) the single best preposition and (b) the set of possible prepositions in the given language that describe the spatial relationship between the two objects. Compared to previous versions of the dataset, we have roughly doubled the size for French, and completely reannotated as well as increased the size of the English portion, providing single best prepositions for English for the first time. Furthermore, we have added explicit 3D depth features for objects. We are releasing our dataset for free reuse, along with evaluation tools to enable comparative evaluation.
%R 10.18653/v1/W18-6516
%U https://aclanthology.org/W18-6516
%U https://doi.org/10.18653/v1/W18-6516
%P 140-145
Markdown (Informal)
[SpatialVOC2K: A Multilingual Dataset of Images with Annotations and Features for Spatial Relations between Objects](https://aclanthology.org/W18-6516) (Belz et al., INLG 2018)
ACL