@inproceedings{kim-etal-2023-bag,
title = "Bag of Tricks for In-Distribution Calibration of Pretrained Transformers",
author = "Kim, Jaeyoung and
Na, Dongbin and
Choi, Sungchul and
Lim, Sungbin",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.40",
doi = "10.18653/v1/2023.findings-eacl.40",
pages = "551--563",
abstract = "While pre-trained language models (PLMs) have become a de-facto standard promoting the accuracy of text classification tasks, recent studies find that PLMs often predict over-confidently. Although calibration methods have been proposed, such as ensemble learning and data augmentation, most of the methods have been verified in computer vision benchmarks rather than in PLM-based text classification tasks. In this paper, we present an empirical study on confidence calibration for PLMs, addressing three categories, including confidence penalty losses, data augmentations, and ensemble methods. We find that the ensemble model overfitted to the training set shows sub-par calibration performance and also observe that PLMs trained with confidence penalty loss have a trade-off between calibration and accuracy. Building on these observations, we propose the Calibrated PLM (CALL), a combination of calibration techniques. The CALL complements shortcomings that may occur when utilizing a calibration method individually and boosts both classification and calibration accuracy. Design choices in CALL{'}s training procedures are extensively studied, and we provide a detailed analysis of how calibration techniques affect the calibration performance of PLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2023-bag">
<titleInfo>
<title>Bag of Tricks for In-Distribution Calibration of Pretrained Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jaeyoung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongbin</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sungchul</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sungbin</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While pre-trained language models (PLMs) have become a de-facto standard promoting the accuracy of text classification tasks, recent studies find that PLMs often predict over-confidently. Although calibration methods have been proposed, such as ensemble learning and data augmentation, most of the methods have been verified in computer vision benchmarks rather than in PLM-based text classification tasks. In this paper, we present an empirical study on confidence calibration for PLMs, addressing three categories, including confidence penalty losses, data augmentations, and ensemble methods. We find that the ensemble model overfitted to the training set shows sub-par calibration performance and also observe that PLMs trained with confidence penalty loss have a trade-off between calibration and accuracy. Building on these observations, we propose the Calibrated PLM (CALL), a combination of calibration techniques. The CALL complements shortcomings that may occur when utilizing a calibration method individually and boosts both classification and calibration accuracy. Design choices in CALL’s training procedures are extensively studied, and we provide a detailed analysis of how calibration techniques affect the calibration performance of PLMs.</abstract>
<identifier type="citekey">kim-etal-2023-bag</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.40</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.40</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>551</start>
<end>563</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bag of Tricks for In-Distribution Calibration of Pretrained Transformers
%A Kim, Jaeyoung
%A Na, Dongbin
%A Choi, Sungchul
%A Lim, Sungbin
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F kim-etal-2023-bag
%X While pre-trained language models (PLMs) have become a de-facto standard promoting the accuracy of text classification tasks, recent studies find that PLMs often predict over-confidently. Although calibration methods have been proposed, such as ensemble learning and data augmentation, most of the methods have been verified in computer vision benchmarks rather than in PLM-based text classification tasks. In this paper, we present an empirical study on confidence calibration for PLMs, addressing three categories, including confidence penalty losses, data augmentations, and ensemble methods. We find that the ensemble model overfitted to the training set shows sub-par calibration performance and also observe that PLMs trained with confidence penalty loss have a trade-off between calibration and accuracy. Building on these observations, we propose the Calibrated PLM (CALL), a combination of calibration techniques. The CALL complements shortcomings that may occur when utilizing a calibration method individually and boosts both classification and calibration accuracy. Design choices in CALL’s training procedures are extensively studied, and we provide a detailed analysis of how calibration techniques affect the calibration performance of PLMs.
%R 10.18653/v1/2023.findings-eacl.40
%U https://aclanthology.org/2023.findings-eacl.40
%U https://doi.org/10.18653/v1/2023.findings-eacl.40
%P 551-563
Markdown (Informal)
[Bag of Tricks for In-Distribution Calibration of Pretrained Transformers](https://aclanthology.org/2023.findings-eacl.40) (Kim et al., Findings 2023)
ACL