@inproceedings{ergoli-etal-2025-seeing, title = "Seeing Cause and Time: A Visually Grounded Evaluation of Multimodal Models", author = "Ergoli, Salvatore and Bondielli, Alessandro and Lenci, Alessandro", editor = "Bosco, Cristina and Jezek, Elisabetta and Polignano, Marco and Sanguinetti, Manuela", booktitle = "Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025)", month = sep, year = "2025", address = "Cagliari, Italy", publisher = "CEUR Workshop Proceedings", url = "https://aclanthology.org/2025.clicit-1.42/", pages = "423--433", ISBN = "979-12-243-0587-3" }