@inproceedings{keith-oconnor-2018-uncertainty,
title = "Uncertainty-aware generative models for inferring document class prevalence",
author = "Keith, Katherine and
O{'}Connor, Brendan",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1487",
doi = "10.18653/v1/D18-1487",
pages = "4575--4585",
abstract = "Prevalence estimation is the task of inferring the relative frequency of classes of unlabeled examples in a group{---}for example, the proportion of a document collection with positive sentiment. Previous work has focused on aggregating and adjusting discriminative individual classifiers to obtain prevalence point estimates. But imperfect classifier accuracy ought to be reflected in uncertainty over the predicted prevalence for scientifically valid inference. In this work, we present (1) a generative probabilistic modeling approach to prevalence estimation, and (2) the construction and evaluation of prevalence confidence intervals; in particular, we demonstrate that an off-the-shelf discriminative classifier can be given a generative re-interpretation, by backing out an implicit individual-level likelihood function, which can be used to conduct fast and simple group-level Bayesian inference. Empirically, we demonstrate our approach provides better confidence interval coverage than an alternative, and is dramatically more robust to shifts in the class prior between training and testing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="keith-oconnor-2018-uncertainty">
<titleInfo>
<title>Uncertainty-aware generative models for inferring document class prevalence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prevalence estimation is the task of inferring the relative frequency of classes of unlabeled examples in a group—for example, the proportion of a document collection with positive sentiment. Previous work has focused on aggregating and adjusting discriminative individual classifiers to obtain prevalence point estimates. But imperfect classifier accuracy ought to be reflected in uncertainty over the predicted prevalence for scientifically valid inference. In this work, we present (1) a generative probabilistic modeling approach to prevalence estimation, and (2) the construction and evaluation of prevalence confidence intervals; in particular, we demonstrate that an off-the-shelf discriminative classifier can be given a generative re-interpretation, by backing out an implicit individual-level likelihood function, which can be used to conduct fast and simple group-level Bayesian inference. Empirically, we demonstrate our approach provides better confidence interval coverage than an alternative, and is dramatically more robust to shifts in the class prior between training and testing.</abstract>
<identifier type="citekey">keith-oconnor-2018-uncertainty</identifier>
<identifier type="doi">10.18653/v1/D18-1487</identifier>
<location>
<url>https://aclanthology.org/D18-1487</url>
</location>
<part>
<date>2018-oct-nov</date>
<extent unit="page">
<start>4575</start>
<end>4585</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uncertainty-aware generative models for inferring document class prevalence
%A Keith, Katherine
%A O’Connor, Brendan
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F keith-oconnor-2018-uncertainty
%X Prevalence estimation is the task of inferring the relative frequency of classes of unlabeled examples in a group—for example, the proportion of a document collection with positive sentiment. Previous work has focused on aggregating and adjusting discriminative individual classifiers to obtain prevalence point estimates. But imperfect classifier accuracy ought to be reflected in uncertainty over the predicted prevalence for scientifically valid inference. In this work, we present (1) a generative probabilistic modeling approach to prevalence estimation, and (2) the construction and evaluation of prevalence confidence intervals; in particular, we demonstrate that an off-the-shelf discriminative classifier can be given a generative re-interpretation, by backing out an implicit individual-level likelihood function, which can be used to conduct fast and simple group-level Bayesian inference. Empirically, we demonstrate our approach provides better confidence interval coverage than an alternative, and is dramatically more robust to shifts in the class prior between training and testing.
%R 10.18653/v1/D18-1487
%U https://aclanthology.org/D18-1487
%U https://doi.org/10.18653/v1/D18-1487
%P 4575-4585
Markdown (Informal)
[Uncertainty-aware generative models for inferring document class prevalence](https://aclanthology.org/D18-1487) (Keith & O’Connor, EMNLP 2018)
ACL