@inproceedings{bunn-etal-2025-fine, title = "Fine-Tune on the Format: First Improving Multiple-Choice Evaluation for Intermediate {LLM} Checkpoints", author = "Bunn, Alec and Wiegreffe, Sarah and Bogin, Ben", editor = "Arviv, Ofir and Clinciu, Miruna and Dhole, Kaustubh and Dror, Rotem and Gehrmann, Sebastian and Habba, Eliya and Itzhak, Itay and Mille, Simon and Perlitz, Yotam and Santus, Enrico and Sedoc, Jo{\~a}o and Shmueli Scheuer, Michal and Stanovsky, Gabriel and Tafjord, Oyvind", booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})", month = jul, year = "2025", address = "Vienna, Austria and virtual meeting", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2025.gem-1.46/", pages = "511--521", ISBN = "979-8-89176-261-9" }