@InProceedings{zielinski-mutschke:2017:NLPandCSS,
  author    = {Zielinski, Andrea  and  Mutschke, Peter},
  title     = {Mining Social Science Publications for Survey Variables},
  booktitle = {Proceedings of the Second Workshop on NLP and Computational Social Science},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {47--52},
  abstract  = {Research in Social Science is usually based on survey data where individual
	research questions relate to observable concepts (variables). However, due to a
	lack of standards for data citations a reliable identification of the variables
	used is often difficult. In this paper, we present a work-in-progress study
	that seeks to provide a solution to the variable detection task based on
	supervised machine learning algorithms, using a linguistic analysis pipeline to
	extract a rich feature set, including terminological concepts and similarity
	metric scores.
	Further, we present preliminary results on a small dataset that has been
	specifically designed for this task, yielding
	a significant increase in performance over the random baseline.},
  url       = {http://www.aclweb.org/anthology/W17-2907}
}

