@inproceedings{gu-etal-2024-rwkv, title = "{RWKV}-{CLIP}: A Robust Vision-Language Representation Learner", author = "Gu, Tiancheng and Yang, Kaicheng and An, Xiang and Feng, Ziyong and Liu, Dongnan and Cai, Weidong and Deng, Jiankang", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.emnlp-main.276/", doi = "10.18653/v1/2024.emnlp-main.276", pages = "4799--4812" }