@inproceedings{kranzlein-etal-2021-making-heads,
title = "Making Heads and Tails of Models with Marginal Calibration for Sparse Tagsets",
author = "Kranzlein, Michael and
Liu, Nelson F. and
Schneider, Nathan",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.423",
doi = "10.18653/v1/2021.findings-emnlp.423",
pages = "4919--4928",
abstract = "For interpreting the behavior of a probabilistic model, it is useful to measure a model{'}s calibration{---}the extent to which it produces reliable confidence scores. We address the open problem of calibration for tagging models with sparse tagsets, and recommend strategies to measure and reduce calibration error (CE) in such models. We show that several post-hoc recalibration techniques all reduce calibration error across the marginal distribution for two existing sequence taggers. Moreover, we propose tag frequency grouping (TFG) as a way to measure calibration error in different frequency bands. Further, recalibrating each group separately promotes a more equitable reduction of calibration error across the tag frequency spectrum.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kranzlein-etal-2021-making-heads">
<titleInfo>
<title>Making Heads and Tails of Models with Marginal Calibration for Sparse Tagsets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Kranzlein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nelson</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>For interpreting the behavior of a probabilistic model, it is useful to measure a model’s calibration—the extent to which it produces reliable confidence scores. We address the open problem of calibration for tagging models with sparse tagsets, and recommend strategies to measure and reduce calibration error (CE) in such models. We show that several post-hoc recalibration techniques all reduce calibration error across the marginal distribution for two existing sequence taggers. Moreover, we propose tag frequency grouping (TFG) as a way to measure calibration error in different frequency bands. Further, recalibrating each group separately promotes a more equitable reduction of calibration error across the tag frequency spectrum.</abstract>
<identifier type="citekey">kranzlein-etal-2021-making-heads</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.423</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.423</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>4919</start>
<end>4928</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Making Heads and Tails of Models with Marginal Calibration for Sparse Tagsets
%A Kranzlein, Michael
%A Liu, Nelson F.
%A Schneider, Nathan
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F kranzlein-etal-2021-making-heads
%X For interpreting the behavior of a probabilistic model, it is useful to measure a model’s calibration—the extent to which it produces reliable confidence scores. We address the open problem of calibration for tagging models with sparse tagsets, and recommend strategies to measure and reduce calibration error (CE) in such models. We show that several post-hoc recalibration techniques all reduce calibration error across the marginal distribution for two existing sequence taggers. Moreover, we propose tag frequency grouping (TFG) as a way to measure calibration error in different frequency bands. Further, recalibrating each group separately promotes a more equitable reduction of calibration error across the tag frequency spectrum.
%R 10.18653/v1/2021.findings-emnlp.423
%U https://aclanthology.org/2021.findings-emnlp.423
%U https://doi.org/10.18653/v1/2021.findings-emnlp.423
%P 4919-4928
Markdown (Informal)
[Making Heads and Tails of Models with Marginal Calibration for Sparse Tagsets](https://aclanthology.org/2021.findings-emnlp.423) (Kranzlein et al., Findings 2021)
ACL