@inproceedings{huang-etal-2023-av, title = "{AV}-{T}ran{S}peech: Audio-Visual Robust Speech-to-Speech Translation", author = "Huang, Rongjie and Liu, Huadai and Cheng, Xize and Ren, Yi and Li, Linjun and Ye, Zhenhui and He, Jinzheng and Zhang, Lichao and Liu, Jinglin and Yin, Xiang and Zhao, Zhou", editor = "Rogers, Anna and Boyd-Graber, Jordan and Okazaki, Naoaki", booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.acl-long.479/", doi = "10.18653/v1/2023.acl-long.479", pages = "8590--8604" }