@InProceedings{cevahir-murakami:2016:COLING,
  author    = {Cevahir, Ali  and  Murakami, Koji},
  title     = {Large-scale Multi-class and Hierarchical Product Categorization for an E-commerce Giant},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {525--535},
  abstract  = {In order to organize the large number of products listed in e-commerce sites,
	each product is usually assigned to one of the multi-level categories in the
	taxonomy tree. It is a time-consuming and difficult task for merchants to
	select proper categories within thousands of options for the products they
	sell. In this work, we propose an automatic classification tool to predict the
	matching category for a given product title and description. We used a
	combination of two different neural models, i.e., deep belief nets and deep
	autoencoders, for both titles and descriptions. We implemented a selective
	reconstruction approach for the input layer during the training of the deep
	neural networks, in order to scale-out for large-sized sparse feature vectors.
	GPUs are utilized in order to train neural networks in a reasonable time.  We
	have trained our models for around 150 million products with a taxonomy tree
	with at most 5 levels that contains 28,338 leaf categories. Tests with millions
	of products show that our first predictions matches 81% of  merchants'
	assignments, when "others" categories are excluded.},
  url       = {http://aclweb.org/anthology/C16-1051}
}

