@inproceedings{gella-keller-2017-analysis,
title = "An Analysis of Action Recognition Datasets for Language and Vision Tasks",
author = "Gella, Spandana and
Keller, Frank",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P17-2011",
doi = "10.18653/v1/P17-2011",
pages = "64--71",
abstract = "A large amount of recent research has focused on tasks that combine language and vision, resulting in a proliferation of datasets and methods. One such task is action recognition, whose applications include image annotation, scene understanding and image retrieval. In this survey, we categorize the existing approaches based on how they conceptualize this problem and provide a detailed review of existing datasets, highlighting their diversity as well as advantages and disadvantages. We focus on recently developed datasets which link visual information with linguistic resources and provide a fine-grained syntactic and semantic analysis of actions in images.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gella-keller-2017-analysis">
<titleInfo>
<title>An Analysis of Action Recognition Datasets for Language and Vision Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Keller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Barzilay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A large amount of recent research has focused on tasks that combine language and vision, resulting in a proliferation of datasets and methods. One such task is action recognition, whose applications include image annotation, scene understanding and image retrieval. In this survey, we categorize the existing approaches based on how they conceptualize this problem and provide a detailed review of existing datasets, highlighting their diversity as well as advantages and disadvantages. We focus on recently developed datasets which link visual information with linguistic resources and provide a fine-grained syntactic and semantic analysis of actions in images.</abstract>
<identifier type="citekey">gella-keller-2017-analysis</identifier>
<identifier type="doi">10.18653/v1/P17-2011</identifier>
<location>
<url>https://aclanthology.org/P17-2011</url>
</location>
<part>
<date>2017-07</date>
<extent unit="page">
<start>64</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Analysis of Action Recognition Datasets for Language and Vision Tasks
%A Gella, Spandana
%A Keller, Frank
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F gella-keller-2017-analysis
%X A large amount of recent research has focused on tasks that combine language and vision, resulting in a proliferation of datasets and methods. One such task is action recognition, whose applications include image annotation, scene understanding and image retrieval. In this survey, we categorize the existing approaches based on how they conceptualize this problem and provide a detailed review of existing datasets, highlighting their diversity as well as advantages and disadvantages. We focus on recently developed datasets which link visual information with linguistic resources and provide a fine-grained syntactic and semantic analysis of actions in images.
%R 10.18653/v1/P17-2011
%U https://aclanthology.org/P17-2011
%U https://doi.org/10.18653/v1/P17-2011
%P 64-71
Markdown (Informal)
[An Analysis of Action Recognition Datasets for Language and Vision Tasks](https://aclanthology.org/P17-2011) (Gella & Keller, ACL 2017)
ACL