@inproceedings{kudugunta-etal-2021-beyond-distillation, title = "Beyond Distillation: Task-level Mixture-of-Experts for Efficient Inference", author = "Kudugunta, Sneha and Huang, Yanping and Bapna, Ankur and Krikun, Maxim and Lepikhin, Dmitry and Luong, Minh-Thang and Firat, Orhan", editor = "Moens, Marie-Francine and Huang, Xuanjing and Specia, Lucia and Yih, Scott Wen-tau", booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021", month = nov, year = "2021", address = "Punta Cana, Dominican Republic", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.findings-emnlp.304/", doi = "10.18653/v1/2021.findings-emnlp.304", pages = "3577--3599" }