@inproceedings{ao-etal-2022-speecht5, title = "{S}peech{T}5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing", author = "Ao, Junyi and Wang, Rui and Zhou, Long and Wang, Chengyi and Ren, Shuo and Wu, Yu and Liu, Shujie and Ko, Tom and Li, Qing and Zhang, Yu and Wei, Zhihua and Qian, Yao and Li, Jinyu and Wei, Furu", editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline", booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = may, year = "2022", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2022.acl-long.393/", doi = "10.18653/v1/2022.acl-long.393", pages = "5723--5738" }