@InProceedings{albadrashiny-EtAl:2016:ALR12,
  author    = {Al-Badrashiny, Mohamed  and  Hawwari, Abdelati  and  Ghoneim, Mahmoud  and  Diab, Mona},
  title     = {SAMER: A Semi-Automatically Created Lexical Resource for Arabic Verbal Multiword Expressions Tokens Paradigm and their Morphosyntactic Features},
  booktitle = {Proceedings of the 12th Workshop on Asian Language Resources (ALR12)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {113--122},
  abstract  = {Although MWE are relatively morphologically and syntactically fixed
	expressions, several types
	of flexibility can be observed in MWE, verbal MWE in particular. Identifying
	the degree of
	morphological and syntactic flexibility of MWE is very important for many
	Lexicographic and
	NLP tasks. Adding MWE variants/tokens to a dictionary resource requires
	characterizing the
	flexibility among other morphosyntactic features. Carrying out the task
	manually faces several
	challenges since it is a very laborious task time and effort wise, as well as
	it will suffer from
	coverage limitation. The problem is exacerbated in rich morphological languages
	where the
	average word in Arabic could have 12 possible inflection forms. Accordingly, in
	this paper we
	introduce a semi-automatic Arabic multiwords expressions resource (SAMER). We
	propose an
	automated method that identifies the morphological and syntactic flexibility of
	Arabic Verbal
	Multiword Expressions (AVMWE). All observed morphological variants and
	syntactic pattern
	alternations of an AVMWE are automatically acquired using large scale corpora.
	We look for three
	morphosyntactic aspects of AVMWE types investigating derivational and
	inflectional variations
	and syntactic templates, namely: 1) inflectional variation (inflectional
	paradigm) and calculating
	degree of flexibility; 2) derivational productivity; and 3) identifying and
	classifying the different
	syntactic types. We build a comprehensive list of AVMWE. Every token in the
	AVMWE list is
	lemmatized and tagged with POS information. We then search Arabic Gigaword and
	All ATBs
	for all possible flexible matches. For each AVMWE type we generate: a) a
	statistically ranked list
	of MWE-lexeme inflections and syntactic pattern alternations; b) An abstract
	syntactic template;
	and c) The most frequent form. Our technique is validated using a Golden MWE
	annotated list.
	The results shows that the quality of the generated resource is 80.04%.},
  url       = {http://aclweb.org/anthology/W16-5414}
}

