@inproceedings{longpre-etal-2024-pretrainers, title = "A Pretrainer`s Guide to Training Data: Measuring the Effects of Data Age, Domain Coverage, Quality, {\&} Toxicity", author = "Longpre, Shayne and Yauney, Gregory and Reif, Emily and Lee, Katherine and Roberts, Adam and Zoph, Barret and Zhou, Denny and Wei, Jason and Robinson, Kevin and Mimno, David and Ippolito, Daphne", editor = "Duh, Kevin and Gomez, Helena and Bethard, Steven", booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.naacl-long.179/", doi = "10.18653/v1/2024.naacl-long.179", pages = "3245--3276" }