@inproceedings{laskar-etal-2024-query, title = "Query-{OPT}: Optimizing Inference of Large Language Models via Multi-Query Instructions in Meeting Summarization", author = "Laskar, Md Tahmid Rahman and Khasanova, Elena and Fu, Xue-Yong and Chen, Cheng and Tn, Shashi Bhushan", editor = "Dernoncourt, Franck and Preo{\c{t}}iuc-Pietro, Daniel and Shimorina, Anastasia", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track", month = nov, year = "2024", address = "Miami, Florida, US", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.emnlp-industry.86/", doi = "10.18653/v1/2024.emnlp-industry.86", pages = "1140--1151" }