@InProceedings{brad-EtAl:2017:I17-1,
  author    = {Brad, Florin  and  Iacob, Radu Cristian Alexandru  and  Hosu, Ionel Alexandru  and  Rebedea, Traian},
  title     = {Dataset for a Neural Natural Language Interface for Databases (NNLIDB)},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {906--914},
  abstract  = {Progress in natural language interfaces to databases (NLIDB) has been slow
	mainly due to linguistic issues (such as language ambiguity) and domain
	portability. Moreover, the lack of a large corpus to be used as a standard
	benchmark has made data-driven approaches difficult to develop and compare. In
	this paper, we revisit the problem of NLIDBs and recast it as a sequence
	translation problem. To this end, we introduce a large dataset extracted from
	the Stack Exchange Data Explorer website, which can be used for training neural
	natural language interfaces for databases. We also report encouraging baseline
	results on a smaller manually annotated test corpus, obtained using an
	attention-based sequence-to-sequence neural network.},
  url       = {http://www.aclweb.org/anthology/I17-1091}
}