@InProceedings{sirts-piguet-johnson:2017:CoNLL,
  author    = {Sirts, Kairit  and  Piguet, Olivier  and  Johnson, Mark},
  title     = {Idea density for predicting Alzheimer's disease from transcribed speech},
  booktitle = {Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {322--332},
  abstract  = {Idea Density (ID) measures the rate at which ideas or elementary predications
	are expressed in an utterance or in a text.
	Lower ID is found to be associated with an increased risk of developing
	Alzheimer's disease (AD) (Snowdon et al., 1996; Engelman et al., 2010).
	ID has been used in two different versions: propositional idea density (PID)
	counts the expressed ideas and can be applied to any text while semantic idea
	density (SID) counts pre-defined information content units and is naturally
	more applicable to normative domains, such as picture description tasks.
	In this paper, we develop DEPID, a novel dependency-based method for computing
	PID, and its version DEPID-R that enables to exclude repeating ideas---a
	feature characteristic to AD speech.  We conduct the first comparison of
	automatically extracted PID and SID in the diagnostic classification task on
	two different AD datasets covering both closed-topic and free-recall domains. 
	While SID performs better on the normative dataset, adding PID leads to a small
	but significant improvement (+1.7 F-score). On the free-topic dataset, PID
	performs better than SID as expected (77.6 vs 72.3 in F-score) but adding the
	features derived from the word embedding clustering underlying the automatic
	SID increases the results considerably, leading to an F-score of 84.8.},
  url       = {http://aclweb.org/anthology/K17-1033}
}

