@inproceedings{chen-etal-2023-stair, title = "{STAIR}: Learning Sparse Text and Image Representation in Grounded Tokens", author = "Chen, Chen and Zhang, Bowen and Cao, Liangliang and Shen, Jiguang and Gunter, Tom and Jose, Albin and Toshev, Alexander and Zheng, Yantao and Shlens, Jonathon and Pang, Ruoming and Yang, Yinfei", editor = "Bouamor, Houda and Pino, Juan and Bali, Kalika", booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing", month = dec, year = "2023", address = "Singapore", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.emnlp-main.932/", doi = "10.18653/v1/2023.emnlp-main.932", pages = "15079--15094" }