@InProceedings{tutin-kraif:2017:MWE2017,
  author    = {Tutin, Agn\`{e}s  and  Kraif, Olivier},
  title     = {Comparing Recurring Lexico-Syntactic Trees (RLTs) and Ngram Techniques for Extended Phraseology Extraction},
  booktitle = {Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017)},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {176--180},
  abstract  = {This paper aims at assessing to what extent a syntax-based method (Recurring
	Lexico-syntactic Trees                                (RLT) extraction) allows us to
	extract
	large
	phraseological units such as prefabricated routines, e.g. "as previously said"
	or "as far as we/I know" in scientific writing.  In order to evaluate this
	method, we compare it to the classical ngram extraction technique, on a subset
	of recurring segments including speech verbs in a French corpus of scientific
	writing. Results show that  the LRT extraction technique is far more efficient
	for extended MWEs such as routines or collocations but performs more poorly for
	surface phenomena such as syntactic constructions or fully frozen expressions.},
  url       = {http://www.aclweb.org/anthology/W17-1724}
}