@inproceedings{alizadeh-etal-2024-llm,
    title = "{LLM} in a flash: Efficient Large Language Model Inference with Limited Memory",
    author = "Alizadeh, Keivan  and
      Mirzadeh, Seyed Iman  and
      Belenko, Dmitry  and
      Khatamifard, S.  and
      Cho, Minsik  and
      Del Mundo, Carlo C  and
      Rastegari, Mohammad  and
      Farajtabar, Mehrdad",
    editor = "Ku, Lun-Wei  and
      Martins, Andre  and
      Srikumar, Vivek",
    booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = aug,
    year = "2024",
    address = "Bangkok, Thailand",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2024.acl-long.678/",
    doi = "10.18653/v1/2024.acl-long.678",
    pages = "12562--12584"
}