@inproceedings{yang-etal-2024-audiovsr, title = "{A}udio{VSR}: Enhancing Video Speech Recognition with Audio Data", author = "Yang, Xiaoda and Cheng, Xize and Duan, Jiaqi and Qiu, Hongshun and Hong, Minjie and Fang, Minghui and Ji, Shengpeng and Zuo, Jialong and Hong, Zhiqing and Zhang, Zhimeng and Jin, Tao", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.emnlp-main.858/", doi = "10.18653/v1/2024.emnlp-main.858", pages = "15352--15361" }