@inproceedings{sun-etal-2023-validity, title = "The Validity of Evaluation Results: Assessing Concurrence Across Compositionality Benchmarks", author = "Sun, Kaiser and Williams, Adina and Hupkes, Dieuwke", editor = "Jiang, Jing and Reitter, David and Deng, Shumin", booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)", month = dec, year = "2023", address = "Singapore", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.conll-1.19/", doi = "10.18653/v1/2023.conll-1.19", pages = "274--293" }