@article{roy-etal-2021-efficient, title = "Efficient Content-Based Sparse Attention with Routing Transformers", author = "Roy, Aurko and Saffar, Mohammad and Vaswani, Ashish and Grangier, David", editor = "Roark, Brian and Nenkova, Ani", journal = "Transactions of the Association for Computational Linguistics", volume = "9", year = "2021", address = "Cambridge, MA", publisher = "MIT Press", url = "https://aclanthology.org/2021.tacl-1.4/", doi = "10.1162/tacl_a_00353", pages = "53--68" }