@inproceedings{huang-etal-2019-multi-modal, title = "Multi-modal Discriminative Model for Vision-and-Language Navigation", author = "Huang, Haoshuo and Jain, Vihan and Mehta, Harsh and Baldridge, Jason and Ie, Eugene", editor = "Bhatia, Archna and Bisk, Yonatan and Kordjamshidi, Parisa and Thomason, Jesse", booktitle = "Proceedings of the Combined Workshop on Spatial Language Understanding ({S}p{LU}) and Grounded Communication for Robotics ({R}obo{NLP})", month = jun, year = "2019", address = "Minneapolis, Minnesota", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/W19-1605/", doi = "10.18653/v1/W19-1605", pages = "40--49" }