@inproceedings{huang-etal-2023-av,
    title = "{AV}-{T}ran{S}peech: Audio-Visual Robust Speech-to-Speech Translation",
    author = "Huang, Rongjie  and
      Liu, Huadai  and
      Cheng, Xize  and
      Ren, Yi  and
      Li, Linjun  and
      Ye, Zhenhui  and
      He, Jinzheng  and
      Zhang, Lichao  and
      Liu, Jinglin  and
      Yin, Xiang  and
      Zhao, Zhou",
    editor = "Rogers, Anna  and
      Boyd-Graber, Jordan  and
      Okazaki, Naoaki",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-long.479/",
    doi = "10.18653/v1/2023.acl-long.479",
    pages = "8590--8604"
}