@inproceedings{ye-etal-2021-towards,
title = "Towards More Fine-grained and Reliable {NLP} Performance Prediction",
author = "Ye, Zihuiwen and
Liu, Pengfei and
Fu, Jinlan and
Neubig, Graham",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.324",
doi = "10.18653/v1/2021.eacl-main.324",
pages = "3703--3714",
abstract = "Performance prediction, the task of estimating a system{'}s performance without performing experiments, allows us to reduce the experimental burden caused by the combinatorial explosion of different datasets, languages, tasks, and models. In this paper, we make two contributions to improving performance prediction for NLP tasks. First, we examine performance predictors not only for holistic measures of accuracy like F1 or BLEU, but also \textit{fine-grained} performance measures such as accuracy over individual classes of examples. Second, we propose methods to understand the \textit{reliability} of a performance prediction model from two angles: confidence intervals and calibration. We perform an analysis of four types of NLP tasks, and both demonstrate the feasibility of fine-grained performance prediction and the necessity to perform reliability analysis for performance prediction methods in the future.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ye-etal-2021-towards">
<titleInfo>
<title>Towards More Fine-grained and Reliable NLP Performance Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zihuiwen</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinlan</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Performance prediction, the task of estimating a system’s performance without performing experiments, allows us to reduce the experimental burden caused by the combinatorial explosion of different datasets, languages, tasks, and models. In this paper, we make two contributions to improving performance prediction for NLP tasks. First, we examine performance predictors not only for holistic measures of accuracy like F1 or BLEU, but also fine-grained performance measures such as accuracy over individual classes of examples. Second, we propose methods to understand the reliability of a performance prediction model from two angles: confidence intervals and calibration. We perform an analysis of four types of NLP tasks, and both demonstrate the feasibility of fine-grained performance prediction and the necessity to perform reliability analysis for performance prediction methods in the future.</abstract>
<identifier type="citekey">ye-etal-2021-towards</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.324</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.324</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>3703</start>
<end>3714</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards More Fine-grained and Reliable NLP Performance Prediction
%A Ye, Zihuiwen
%A Liu, Pengfei
%A Fu, Jinlan
%A Neubig, Graham
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F ye-etal-2021-towards
%X Performance prediction, the task of estimating a system’s performance without performing experiments, allows us to reduce the experimental burden caused by the combinatorial explosion of different datasets, languages, tasks, and models. In this paper, we make two contributions to improving performance prediction for NLP tasks. First, we examine performance predictors not only for holistic measures of accuracy like F1 or BLEU, but also fine-grained performance measures such as accuracy over individual classes of examples. Second, we propose methods to understand the reliability of a performance prediction model from two angles: confidence intervals and calibration. We perform an analysis of four types of NLP tasks, and both demonstrate the feasibility of fine-grained performance prediction and the necessity to perform reliability analysis for performance prediction methods in the future.
%R 10.18653/v1/2021.eacl-main.324
%U https://aclanthology.org/2021.eacl-main.324
%U https://doi.org/10.18653/v1/2021.eacl-main.324
%P 3703-3714
Markdown (Informal)
[Towards More Fine-grained and Reliable NLP Performance Prediction](https://aclanthology.org/2021.eacl-main.324) (Ye et al., EACL 2021)
ACL