@InProceedings{degaetanoortlieb-teich:2017:LaTeCH-CLfL,
  author    = {Degaetano-Ortlieb, Stefania  and  Teich, Elke},
  title     = {Modeling intra-textual variation with entropy and surprisal: topical vs. stylistic patterns},
  booktitle = {Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {68--77},
  abstract  = {We present a data-driven approach to investigate
	intra-textual variation by combining
	entropy and surprisal. With this
	approach we detect linguistic variation
	based on phrasal lexico-grammatical patterns
	across sections of research articles.
	Entropy is used to detect patterns typical
	of specific sections. Surprisal is used
	to differentiate between more and less
	informationally-loaded patterns as well as
	type of information (topical vs. stylistic).
	While we here focus on research articles in
	biology/genetics, the methodology is especially
	interesting for digital humanities
	scholars, as it can be applied to any text
	type or domain and combined with additional
	variables (e.g. time, author or social
	group).},
  url       = {http://www.aclweb.org/anthology/W17-2209}
}

