@InProceedings{nguyen-EtAl:2016:ALR12,
  author    = {Nguyen, Minh-Tien  and  Lai, Dac Viet  and  Do, Phong-Khac  and  Tran, Duc-Vu  and  Nguyen, Minh-Le},
  title     = {VSoLSCSum: Building a Vietnamese Sentence-Comment Dataset for Social Context Summarization},
  booktitle = {Proceedings of the 12th Workshop on Asian Language Resources (ALR12)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {38--48},
  abstract  = {This paper presents VSoLSCSum, a Vietnamese linked sentence-comment dataset,
	which was manually created to treat the lack of standard corpora for social
	context summarization in
	Vietnamese. The dataset was collected through the keywords of 141 Web documents
	in 12 special events, which were mentioned on Vietnamese Web pages. Social
	users were asked to involve in creating standard summaries and the label of
	each sentence or comment. The inter-agreement calculated by Cohen's Kappa among
	raters after validating is 0.685. To illustrate the potential use of our
	dataset, a learning to rank method was trained by using a set of local and
	social features. Experimental results indicate that the summary model trained
	on our dataset outperforms state-of-the-art baselines in both ROUGE-1 and
	ROUGE-2 in social context summarization.},
  url       = {http://aclweb.org/anthology/W16-5405}
}

