<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W16">
  <paper id="3200">
    <title>Proceedings of the 5th Workshop on Vision and Language</title>
    <editor>Anya Belz</editor>
    <editor>Erkut Erdem</editor>
    <editor>Krystian Mikolajczyk,</editor>
    <editor>Katerina Pastra</editor>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <url>http://anthology.aclweb.org/W16-32</url>
    <bibtype>book</bibtype>
    <bibkey>VL16:2016</bibkey>
  </paper>

  <paper id="3201">
    <title>Automatic Annotation of Structured Facts in Images</title>
    <author><first>Mohamed</first><last>Elhoseiny</last></author>
    <author><first>Scott</first><last>Cohen</last></author>
    <author><first>Walter</first><last>Chang</last></author>
    <author><first>Brian</first><last>Price</last></author>
    <author><first>Ahmed</first><last>Elgammal</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>1&#8211;9</pages>
    <url>http://anthology.aclweb.org/W16-3201</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>elhoseiny-EtAl:2016:VL16</bibkey>
  </paper>

  <paper id="3202">
    <title>Combining Lexical and Spatial Knowledge to Predict Spatial Relations between Objects in Images</title>
    <author><first>Manuela</first><last>H&#252;rlimann</last></author>
    <author><first>Johan</first><last>Bos</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>10&#8211;18</pages>
    <url>http://anthology.aclweb.org/W16-3202</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>hurlimann-bos:2016:VL16</bibkey>
  </paper>

  <paper id="3203">
    <title>Focused Evaluation for Image Description with Binary Forced-Choice Tasks</title>
    <author><first>Micah</first><last>Hodosh</last></author>
    <author><first>Julia</first><last>Hockenmaier</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>19&#8211;28</pages>
    <url>http://anthology.aclweb.org/W16-3203</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>hodosh-hockenmaier:2016:VL16</bibkey>
  </paper>

  <paper id="3204">
    <title>Leveraging Captions in the Wild to Improve Object Detection</title>
    <author><first>Mert</first><last>Kilickaya</last></author>
    <author><first>Nazli</first><last>Ikizler-Cinbis</last></author>
    <author><first>Erkut</first><last>Erdem</last></author>
    <author><first>Aykut</first><last>Erdem</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>29&#8211;38</pages>
    <url>http://anthology.aclweb.org/W16-3204</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>kilickaya-EtAl:2016:VL16</bibkey>
  </paper>

  <paper id="3205">
    <title>Natural Language Descriptions of Human Activities Scenes: Corpus Generation and Analysis</title>
    <author><first>Nouf</first><last>Alharbi</last></author>
    <author><first>Yoshihiko</first><last>Gotoh</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>39&#8211;47</pages>
    <url>http://anthology.aclweb.org/W16-3205</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>alharbi-gotoh:2016:VL16</bibkey>
  </paper>

  <paper id="3206">
    <title>Interactively Learning Visually Grounded Word Meanings from a Human Tutor</title>
    <author><first>Yanchao</first><last>Yu</last></author>
    <author><first>Arash</first><last>Eshghi</last></author>
    <author><first>Oliver</first><last>Lemon</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>48&#8211;53</pages>
    <url>http://anthology.aclweb.org/W16-3206</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>yu-eshghi-lemon:2016:VL16</bibkey>
  </paper>

  <paper id="3207">
    <title>Pragmatic Factors in Image Description: The Case of Negations</title>
    <author><first>Emiel</first><last>van Miltenburg</last></author>
    <author><first>Roser</first><last>Morante</last></author>
    <author><first>Desmond</first><last>Elliott</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>54&#8211;59</pages>
    <url>http://anthology.aclweb.org/W16-3207</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>vanmiltenburg-morante-elliott:2016:VL16</bibkey>
  </paper>

  <paper id="3208">
    <title>Building a Bagpipe with a Bag and a Pipe: Exploring Conceptual Combination in Vision</title>
    <author><first>Sandro</first><last>Pezzelle</last></author>
    <author><first>Ravi</first><last>Shekhar</last></author>
    <author><first>Raffaella</first><last>Bernardi</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>60&#8211;64</pages>
    <url>http://anthology.aclweb.org/W16-3208</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>pezzelle-shekhar-bernardi:2016:VL16</bibkey>
  </paper>

  <paper id="3209">
    <title>Exploring Different Preposition Sets, Models and Feature Sets in Automatic Generation of Spatial Image Descriptions</title>
    <author><first>Anja</first><last>Belz</last></author>
    <author><first>Adrian</first><last>Muscat</last></author>
    <author><first>Brandon</first><last>Birmingham</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>65&#8211;69</pages>
    <url>http://anthology.aclweb.org/W16-3209</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>belz-muscat-birmingham:2016:VL16</bibkey>
  </paper>

  <paper id="3210">
    <title>Multi30K: Multilingual English-German Image Descriptions</title>
    <author><first>Desmond</first><last>Elliott</last></author>
    <author><first>Stella</first><last>Frank</last></author>
    <author><first>Khalil</first><last>Sima'an</last></author>
    <author><first>Lucia</first><last>Specia</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>70&#8211;74</pages>
    <url>http://anthology.aclweb.org/W16-3210</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>elliott-EtAl:2016:VL16</bibkey>
  </paper>

  <paper id="3211">
    <title>&#x201c;Look, some Green Circles!&#x201d;: Learning to Quantify from Images</title>
    <author><first>Ionut</first><last>Sorodoc</last></author>
    <author><first>Angeliki</first><last>Lazaridou</last></author>
    <author><first>Gemma</first><last>Boleda</last></author>
    <author><first>Aurėlie</first><last>Herbelot</last></author>
    <author><first>Sandro</first><last>Pezzelle</last></author>
    <author><first>Raffaella</first><last>Bernardi</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>75&#8211;79</pages>
    <url>http://anthology.aclweb.org/W16-3211</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>sorodoc-EtAl:2016:VL16</bibkey>
  </paper>

  <paper id="3212">
    <title>Text2voronoi: An Image-driven Approach to Differential Diagnosis</title>
    <author><first>Alexander</first><last>Mehler</last></author>
    <author><first>Tolga</first><last>Uslu</last></author>
    <author><first>Wahed</first><last>Hemati</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>80&#8211;85</pages>
    <url>http://anthology.aclweb.org/W16-3212</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>mehler-uslu-hemati:2016:VL16</bibkey>
  </paper>

  <paper id="3213">
    <title>Detecting Visually Relevant Sentences for Fine-Grained Classification</title>
    <author><first>Olivia</first><last>Winn</last></author>
    <author><first>Madhavan Kavanur</first><last>Kidambi</last></author>
    <author><first>Smaranda</first><last>Muresan</last></author>
    <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin, Germany</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>86&#8211;91</pages>
    <url>http://anthology.aclweb.org/W16-3213</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>winn-kidambi-muresan:2016:VL16</bibkey>
  </paper>

</volume>

