@inproceedings{blasi-nunez-etal-2025-mldataforge, title = "{MLD}ata{F}orge: Accelerating Large-Scale Dataset Preprocessing and Access for Multimodal Foundation Model Training", author = "Blasi N{\'u}{\~n}ez, Andrea and Achatius Galke, Lukas Paul and Schneider-Kamp, Peter", editor = "Angelova, Galia and Kunilovskaya, Maria and Escribe, Marie and Mitkov, Ruslan", booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era", month = sep, year = "2025", address = "Varna, Bulgaria", publisher = "INCOMA Ltd., Shoumen, Bulgaria", url = "https://aclanthology.org/2025.ranlp-1.21/", pages = "175--183" }