@article{TACL568,
        author = {Angeliki Lazaridou and Georgiana Dinu and Adam Liska and Marco
Baroni},
        title = {From Visual Attributes to Adjectives through Decompositional
Distributional Semantics},
        journal = {Transactions of the Association for Computational Linguistics},
        volume = {3},
        year = {2015},
        keywords = {},
        abstract = {As automated image analysis progresses, there is increasing
interest  in richer linguistic annotation of pictures, with attributes  of
objects (e.g., furry, brown...) attracting most  attention. By building on
the recent \"zero-shot learning\" approach, and paying attention to the
linguistic nature of  attributes as noun modifiers, and specifically
adjectives, we show  that it is possible to tag images with
attribute-denoting adjectives  even when no training data containing the
relevant annotation are  available. Our approach relies on two key
observations. First,  objects can be seen as bundles of attributes,
typically expressed as  adjectival modifiers (a dog is something furry,
brown, etc.), and thus a function trained to map visual  representations of
objects to nominal labels can implicitly learn to  map attributes to
adjectives. Second, objects and attributes come  together in pictures (the
same thing is a dog and it is  brown). We can thus achieve better attribute
(and object)  label retrieval by treating images as \"visual phrases\", and
decomposing their linguistic representation into an  attribute-denoting
adjective and an object-denoting noun. Our  approach performs comparably to
a method exploiting manual attribute  annotation, it outperforms various
competitive alternatives in both  attribute and object annotation, and it
automatically constructs  attribute-centric representations that
significantly improve  performance in supervised object recognition.},
        issn = {2307-387X},
        url =
{https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/568},
        pages = {183--196}
}
