@InProceedings{braud-lacroix-sogaard:2017:Short,
  author    = {Braud, Chlo\'{e}  and  Lacroix, Oph\'{e}lie  and  S{\o}gaard, Anders},
  title     = {Cross-lingual and cross-domain discourse segmentation of entire documents},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {237--243},
  abstract  = {Discourse segmentation is a crucial step in building end-to-end discourse
	parsers. However, discourse segmenters only exist for a few languages and
	domains. Typically they only  detect intra-sentential segment boundaries,
	assuming gold standard sentence and token segmentation, and relying on
	high-quality syntactic parses and rich heuristics that are not generally
	available across languages and domains. In this paper, we propose statistical
	discourse segmenters for five languages and three domains that do not rely on
	gold pre-annotations.  We also consider the problem of learning discourse
	segmenters when no labeled data is available for a language. Our fully
	supervised system obtains 89.5% F1 for English newswire, with slight drops in
	performance on other domains, and we report supervised and unsupervised
	(cross-lingual) results for five languages in total.},
  url       = {http://aclweb.org/anthology/P17-2037}
}

