@InProceedings{jin-wanvarie-le:2017:I17-1,
  author    = {Jin, Yiping  and  Wanvarie, Dittaya  and  Le, Phu},
  title     = {Combining Lightly-Supervised Text Classification Models for Accurate Contextual Advertising},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {545--554},
  abstract  = {In this paper we propose a lightly-supervised framework to rapidly build text
	classifiers for contextual advertising. Traditionally text classification
	techniques require labeled training documents for each predefined class. In the
	scenario of contextual advertising, advertisers often want to target to a
	specific class of webpages most relevant to their product or service, which may
	not be covered by a pre-trained classifier. Moreover, the advertisers are
	interested in whether a webpage is ``relevant'' or ``irrelevant''. It is
	time-consuming to solicit the advertisers for reliable training signals for the
	negative class. Therefore, it is more suitable to model the problem as a
	one-class classification problem, in contrast to traditional classification
	problems where disjoint classes are defined a priori.
	We first apply two state-of-the-art lightly-supervised classification models,
	generalized expectation (GE) criteria (Druck et al., 2008) and multinomial
	naive Bayes (MNB) with priors (Settles, 2011) to one-class classification where
	the user only needs to provide a small list of labeled words for the target
	class. To combine the strengths of the two models, we fuse them together by
	using MNB to automatically enrich the constraints for GE training. We also
	explore ensemble method to combine classifiers. On a corpus of webpages from
	real-time bidding requests, the proposed GE1\^{} MNB1 model achieves the highest
	average F1 of 0.69 and closes more than half of the gap between previous
	stateof- the-art lightly-supervised models to a fully-supervised MaxEnt model.},
  url       = {http://www.aclweb.org/anthology/I17-1055}
}

