@InProceedings{shnarch-EtAl:2018:Short,
  author    = {Shnarch, Eyal  and  Alzate, Carlos  and  Dankin, Lena  and  Gleize, Martin  and  Hou, Yufang  and  Choshen, Leshem  and  Aharonov, Ranit  and  Slonim, Noam},
  title     = {Will it Blend? Blending Weak and Strong Labeled Data in a Neural Network for Argumentation Mining},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {599--605},
  abstract  = {The process of obtaining high quality labeled data for natural language understanding tasks is often slow, error-prone, complicated and expensive. With the vast usage of neural networks, this issue becomes more notorious since these networks require a large amount of labeled data to produce satisfactory results. We propose a methodology to blend high quality but scarce strong labeled data with noisy but abundant weak labeled data during the training of neural networks. Experiments in the context of topic-dependent evidence detection with two forms of weak labeled data show the advantages of the blending scheme. In addition, we provide a manually annotated data set for the task of topic-dependent evidence detection. We believe that blending weak and strong labeled data is a general notion that may be applicable to many language understanding tasks, and can especially assist researchers who wish to train a network but have a small amount of high quality labeled data for their task of interest.},
  url       = {http://www.aclweb.org/anthology/P18-2095}
}

