@inproceedings{dan-roth-2021-effects-transformer,
title = "On the Effects of Transformer Size on In- and Out-of-Domain Calibration",
author = "Dan, Soham and
Roth, Dan",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.180",
doi = "10.18653/v1/2021.findings-emnlp.180",
pages = "2096--2101",
abstract = "Large, pre-trained transformer language models, which are pervasive in natural language processing tasks, are notoriously expensive to train. To reduce the cost of training such large models, prior work has developed smaller, more compact models which achieves a significant speedup in training time while maintaining competitive accuracy to the original model on downstream tasks. Though these smaller pre-trained models have been widely adopted by the community, it is not known how well are they calibrated compared to their larger counterparts. In this paper, focusing on a wide range of tasks, we thoroughly investigate the calibration properties of pre-trained transformers, as a function of their size. We demonstrate that when evaluated in-domain, smaller models are able to achieve competitive, and often better, calibration compared to larger models, while achieving significant speedup in training time. Post-hoc calibration techniques further reduce calibration error for all models in-domain. However, when evaluated out-of-domain, larger models tend to be better calibrated, and label-smoothing instead is an effective strategy to calibrate models in this setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dan-roth-2021-effects-transformer">
<titleInfo>
<title>On the Effects of Transformer Size on In- and Out-of-Domain Calibration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soham</namePart>
<namePart type="family">Dan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large, pre-trained transformer language models, which are pervasive in natural language processing tasks, are notoriously expensive to train. To reduce the cost of training such large models, prior work has developed smaller, more compact models which achieves a significant speedup in training time while maintaining competitive accuracy to the original model on downstream tasks. Though these smaller pre-trained models have been widely adopted by the community, it is not known how well are they calibrated compared to their larger counterparts. In this paper, focusing on a wide range of tasks, we thoroughly investigate the calibration properties of pre-trained transformers, as a function of their size. We demonstrate that when evaluated in-domain, smaller models are able to achieve competitive, and often better, calibration compared to larger models, while achieving significant speedup in training time. Post-hoc calibration techniques further reduce calibration error for all models in-domain. However, when evaluated out-of-domain, larger models tend to be better calibrated, and label-smoothing instead is an effective strategy to calibrate models in this setting.</abstract>
<identifier type="citekey">dan-roth-2021-effects-transformer</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.180</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.180</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>2096</start>
<end>2101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Effects of Transformer Size on In- and Out-of-Domain Calibration
%A Dan, Soham
%A Roth, Dan
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F dan-roth-2021-effects-transformer
%X Large, pre-trained transformer language models, which are pervasive in natural language processing tasks, are notoriously expensive to train. To reduce the cost of training such large models, prior work has developed smaller, more compact models which achieves a significant speedup in training time while maintaining competitive accuracy to the original model on downstream tasks. Though these smaller pre-trained models have been widely adopted by the community, it is not known how well are they calibrated compared to their larger counterparts. In this paper, focusing on a wide range of tasks, we thoroughly investigate the calibration properties of pre-trained transformers, as a function of their size. We demonstrate that when evaluated in-domain, smaller models are able to achieve competitive, and often better, calibration compared to larger models, while achieving significant speedup in training time. Post-hoc calibration techniques further reduce calibration error for all models in-domain. However, when evaluated out-of-domain, larger models tend to be better calibrated, and label-smoothing instead is an effective strategy to calibrate models in this setting.
%R 10.18653/v1/2021.findings-emnlp.180
%U https://aclanthology.org/2021.findings-emnlp.180
%U https://doi.org/10.18653/v1/2021.findings-emnlp.180
%P 2096-2101
Markdown (Informal)
[On the Effects of Transformer Size on In- and Out-of-Domain Calibration](https://aclanthology.org/2021.findings-emnlp.180) (Dan & Roth, Findings 2021)
ACL