@inproceedings{zhu-etal-2022-predicting,
title = "Predicting Fine-Tuning Performance with Probing",
author = "Zhu, Zining and
Shahtalebi, Soroosh and
Rudzicz, Frank",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.793",
doi = "10.18653/v1/2022.emnlp-main.793",
pages = "11534--11547",
abstract = "Large NLP models have recently shown impressive performance in language understanding tasks, typically evaluated by their fine-tuned performance. Alternatively, probing has received increasing attention as being a lightweight method for interpreting the intrinsic mechanisms of large NLP models. In probing, post-hoc classifiers are trained on {``}out-of-domain{''} datasets that diagnose specific abilities. While probing the language models has led to insightful findings, they appear disjointed from the development of models. This paper explores the utility of probing deep NLP models to extract a proxy signal widely used in model development {--} the fine-tuning performance. We find that it is possible to use the accuracies of only three probing tests to predict the fine-tuning performance with errors 40{\%} - 80{\%} smaller than baselines. We further discuss possible avenues where probing can empower the development of deep NLP models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2022-predicting">
<titleInfo>
<title>Predicting Fine-Tuning Performance with Probing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zining</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroosh</namePart>
<namePart type="family">Shahtalebi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Rudzicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large NLP models have recently shown impressive performance in language understanding tasks, typically evaluated by their fine-tuned performance. Alternatively, probing has received increasing attention as being a lightweight method for interpreting the intrinsic mechanisms of large NLP models. In probing, post-hoc classifiers are trained on “out-of-domain” datasets that diagnose specific abilities. While probing the language models has led to insightful findings, they appear disjointed from the development of models. This paper explores the utility of probing deep NLP models to extract a proxy signal widely used in model development – the fine-tuning performance. We find that it is possible to use the accuracies of only three probing tests to predict the fine-tuning performance with errors 40% - 80% smaller than baselines. We further discuss possible avenues where probing can empower the development of deep NLP models.</abstract>
<identifier type="citekey">zhu-etal-2022-predicting</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.793</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.793</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>11534</start>
<end>11547</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predicting Fine-Tuning Performance with Probing
%A Zhu, Zining
%A Shahtalebi, Soroosh
%A Rudzicz, Frank
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F zhu-etal-2022-predicting
%X Large NLP models have recently shown impressive performance in language understanding tasks, typically evaluated by their fine-tuned performance. Alternatively, probing has received increasing attention as being a lightweight method for interpreting the intrinsic mechanisms of large NLP models. In probing, post-hoc classifiers are trained on “out-of-domain” datasets that diagnose specific abilities. While probing the language models has led to insightful findings, they appear disjointed from the development of models. This paper explores the utility of probing deep NLP models to extract a proxy signal widely used in model development – the fine-tuning performance. We find that it is possible to use the accuracies of only three probing tests to predict the fine-tuning performance with errors 40% - 80% smaller than baselines. We further discuss possible avenues where probing can empower the development of deep NLP models.
%R 10.18653/v1/2022.emnlp-main.793
%U https://aclanthology.org/2022.emnlp-main.793
%U https://doi.org/10.18653/v1/2022.emnlp-main.793
%P 11534-11547
Markdown (Informal)
[Predicting Fine-Tuning Performance with Probing](https://aclanthology.org/2022.emnlp-main.793) (Zhu et al., EMNLP 2022)
ACL
- Zining Zhu, Soroosh Shahtalebi, and Frank Rudzicz. 2022. Predicting Fine-Tuning Performance with Probing. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 11534–11547, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.