@InProceedings{mcconnaughey-dai-bamman:2017:EMNLP2017,
  author    = {McConnaughey, Lara  and  Dai, Jennifer  and  Bamman, David},
  title     = {The Labeled Segmentation of Printed Books},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {737--747},
  abstract  = {We introduce the task of book structure labeling: segmenting and assigning a
	fixed category (such as Table of Contents, Preface, Index) to the document
	structure of printed books.  We manually annotate the page-level structural
	categories for a large dataset totaling 294,816 pages in 1,055 books evenly
	sampled from 1750-1922, and present empirical results comparing the performance
	of several classes of models.  The best-performing model, a bidirectional LSTM
	with rich features, achieves an overall accuracy of 95.8 and a class-balanced
	macro F-score of 71.4.},
  url       = {https://www.aclweb.org/anthology/D17-1077}
}

