@InProceedings{singh-EtAl:2016:COLING2,
  author    = {Singh, Mayank  and  Barua, Barnopriyo  and  Palod, Priyank  and  Garg, Manvi  and  Satapathy, Sidhartha  and  Bushi, Samuel  and  Ayush, Kumar  and  Sai Rohith, Krishna  and  Gamidi, Tulasi  and  Goyal, Pawan  and  Mukherjee, Animesh},
  title     = {OCR++: A Robust Framework For Information Extraction from Scholarly Articles},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3390--3400},
  abstract  = {This paper proposes OCR++, an open-source framework designed for a variety of 
	information extraction tasks from scholarly articles including metadata (title,
	author names, affiliation and e-mail), 
	structure (section headings and body text, table and figure headings, URLs and
	footnotes) and bibliography (citation instances and references).
	We analyze a diverse set of scientific articles written in English to
	understand generic writing patterns and formulate rules to develop this hybrid
	framework.
	Extensive evaluations show that the proposed framework outperforms the existing
	state-of-the-art tools by a large margin in structural information
	extraction along with improved performance in metadata and bibliography
	extraction tasks, both in terms of accuracy (around 50% improvement) and
	processing time (around 52% improvement). 
	A user experience study conducted with the help of 30 researchers reveals that
	the researchers found this system to be very helpful. As an additional
	objective,
	we discuss two novel use cases including automatically extracting links to
	public datasets from the proceedings, which would further accelerate the
	advancement in digital libraries. The result of the framework can be exported
	as a whole into structured TEI-encoded documents. 
	Our framework is accessible online at
	http://www.cnergres.iitkgp.ac.in/OCR++/home/.},
  url       = {http://aclweb.org/anthology/C16-1320}
}

