@inproceedings{razeghi-etal-2022-snoopy,
title = "Snoopy: An Online Interface for Exploring the Effect of Pretraining Term Frequencies on Few-Shot {LM} Performance",
author = "Razeghi, Yasaman and
Mekala, Raja Sekhar Reddy and
Logan Iv, Robert L and
Gardner, Matt and
Singh, Sameer",
editor = "Che, Wanxiang and
Shutova, Ekaterina",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-demos.39",
doi = "10.18653/v1/2022.emnlp-demos.39",
pages = "389--395",
abstract = "Current evaluation schemes for large language models often fail to consider the impact of the overlap between pretraining corpus and test data on model performance statistics. Snoopy is an online interface that allows researchers to study this impact in few-shot learning settings. Our demo provides term frequency statistics for the Pile, which is an 800 GB corpus, accompanied by the precomputed performance of EleutherAI/GPT models on more than 20 NLP benchmarks, including numerical, commonsense reasoning, natural language understanding, and question-answering tasks. Snoopy allows a user to interactively align specific terms in test instances with their frequency in the Pile, enabling exploratory analysis of how term frequency is related to the accuracy of the models, which are hard to discover through automated means. A user can look at correlations over various model sizes and numbers of in-context examples and visualize the result across multiple (potentially aggregated) datasets. Using Snoopy, we show that a researcher can quickly replicate prior analyses for numerical tasks, while simultaneously allowing for much more expansive exploration that was previously challenging. Snoopy is available at \url{https://nlp.ics.uci.edu/snoopy}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="razeghi-etal-2022-snoopy">
<titleInfo>
<title>Snoopy: An Online Interface for Exploring the Effect of Pretraining Term Frequencies on Few-Shot LM Performance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yasaman</namePart>
<namePart type="family">Razeghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raja</namePart>
<namePart type="given">Sekhar</namePart>
<namePart type="given">Reddy</namePart>
<namePart type="family">Mekala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Logan Iv</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matt</namePart>
<namePart type="family">Gardner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current evaluation schemes for large language models often fail to consider the impact of the overlap between pretraining corpus and test data on model performance statistics. Snoopy is an online interface that allows researchers to study this impact in few-shot learning settings. Our demo provides term frequency statistics for the Pile, which is an 800 GB corpus, accompanied by the precomputed performance of EleutherAI/GPT models on more than 20 NLP benchmarks, including numerical, commonsense reasoning, natural language understanding, and question-answering tasks. Snoopy allows a user to interactively align specific terms in test instances with their frequency in the Pile, enabling exploratory analysis of how term frequency is related to the accuracy of the models, which are hard to discover through automated means. A user can look at correlations over various model sizes and numbers of in-context examples and visualize the result across multiple (potentially aggregated) datasets. Using Snoopy, we show that a researcher can quickly replicate prior analyses for numerical tasks, while simultaneously allowing for much more expansive exploration that was previously challenging. Snoopy is available at https://nlp.ics.uci.edu/snoopy.</abstract>
<identifier type="citekey">razeghi-etal-2022-snoopy</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-demos.39</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-demos.39</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>389</start>
<end>395</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Snoopy: An Online Interface for Exploring the Effect of Pretraining Term Frequencies on Few-Shot LM Performance
%A Razeghi, Yasaman
%A Mekala, Raja Sekhar Reddy
%A Logan Iv, Robert L.
%A Gardner, Matt
%A Singh, Sameer
%Y Che, Wanxiang
%Y Shutova, Ekaterina
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F razeghi-etal-2022-snoopy
%X Current evaluation schemes for large language models often fail to consider the impact of the overlap between pretraining corpus and test data on model performance statistics. Snoopy is an online interface that allows researchers to study this impact in few-shot learning settings. Our demo provides term frequency statistics for the Pile, which is an 800 GB corpus, accompanied by the precomputed performance of EleutherAI/GPT models on more than 20 NLP benchmarks, including numerical, commonsense reasoning, natural language understanding, and question-answering tasks. Snoopy allows a user to interactively align specific terms in test instances with their frequency in the Pile, enabling exploratory analysis of how term frequency is related to the accuracy of the models, which are hard to discover through automated means. A user can look at correlations over various model sizes and numbers of in-context examples and visualize the result across multiple (potentially aggregated) datasets. Using Snoopy, we show that a researcher can quickly replicate prior analyses for numerical tasks, while simultaneously allowing for much more expansive exploration that was previously challenging. Snoopy is available at https://nlp.ics.uci.edu/snoopy.
%R 10.18653/v1/2022.emnlp-demos.39
%U https://aclanthology.org/2022.emnlp-demos.39
%U https://doi.org/10.18653/v1/2022.emnlp-demos.39
%P 389-395
Markdown (Informal)
[Snoopy: An Online Interface for Exploring the Effect of Pretraining Term Frequencies on Few-Shot LM Performance](https://aclanthology.org/2022.emnlp-demos.39) (Razeghi et al., EMNLP 2022)
ACL