@Book{CL4LC:2016,
  editor    = {Dominique Brunato  and  Felice Dell'Orletta  and  Giulia Venturi  and  Thomas François  and  Philippe Blache},
  title     = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  url       = {http://aclweb.org/anthology/W16-41}
}

@InProceedings{jimenezlopez-becerrabonache:2016:CL4LC,
  author    = {Jimenez Lopez, Maria Dolores  and  Becerra-Bonache, Leonor},
  title     = {Could Machine Learning Shed Light on Natural Language Complexity?},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1--11},
  abstract  = {In this paper, we propose to use a subfield of machine learning --grammatical
	inference-- to measure linguistic complexity from a developmental point of
	view. We focus on relative complexity by considering a child learner in the
	process of first language acquisition. The relevance of grammatical inference
	models for measuring linguistic complexity from a developmental point of view
	is based on the fact that algorithms proposed in this area can be considered
	computational models for studying first language acquisition. Even though it
	will be possible to use different techniques from the field of machine learning
	as computational models for dealing with linguistic complexity --since in any
	model we have algorithms that can learn from data--, we claim that grammatical
	inference models offer some advantages over other tools.},
  url       = {http://aclweb.org/anthology/W16-4101}
}

@InProceedings{chersoni-blache-lenci:2016:CL4LC,
  author    = {Chersoni, Emmanuele  and  Blache, Philippe  and  Lenci, Alessandro},
  title     = {Towards a Distributional Model of Semantic Complexity},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {12--22},
  abstract  = {In this paper, we introduce for the first time a Distributional Model for
	computing semantic complexity, inspired by the general principles of the
	Memory, Unification and Control framework(Hagoort, 2013; Hagoort, 2016). We
	argue that sentence comprehension is an incremental process driven by the goal
	of constructing a coherent representation of the event represented by the
	sentence. The composition cost of a sentence depends on the semantic coherence
	of the event being constructed and on the activation degree of the linguistic
	constructions. We also report the results of a first evaluation of the model on
	the Bicknell dataset (Bicknell et al., 2010).},
  url       = {http://aclweb.org/anthology/W16-4102}
}

@InProceedings{marcus-EtAl:2016:CL4LC,
  author    = {Marcus, Str\"{o}bel  and  Kerz, Elma  and  Wiechmann, Daniel  and  Neumann, Stella},
  title     = {CoCoGen - Complexity Contour Generator: Automatic Assessment of Linguistic Complexity Using a Sliding-Window Technique},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {23--31},
  abstract  = {We present a novel approach to the automatic assessment of text complexity
	based on a sliding-window technique that tracks the distribution of complexity
	within a text. Such distribution is captured by what we term “complexity
	contours” derived from a series of measurements for a given linguistic
	complexity measure. This approach is implemented in an automatic computational
	tool, CoCoGen -- Complexity Contour Generator, which in its current version
	supports 32 indices of linguistic complexity. The goal of the paper is twofold:
	(1) to introduce the design of our computational tool based on a sliding-window
	technique and (2) to showcase this approach in the area of second language (L2)
	learning, i.e. more specifically, in the area of L2 writing.},
  url       = {http://aclweb.org/anthology/W16-4103}
}

@InProceedings{vanschijndel-schuler:2016:CL4LC,
  author    = {van Schijndel, Marten  and  Schuler, William},
  title     = {Addressing surprisal deficiencies in reading time models},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {32--37},
  abstract  = {This study demonstrates a weakness in how n-gram and PCFG surprisal are used to
	predict reading times in eye-tracking data. In particular, the information
	conveyed by words skipped during saccades is not usually included in the
	surprisal measures. This study shows that correcting the surprisal calculation
	improves n-gram surprisal and that upcoming n-grams affect reading times,
	replicating previous findings of how lexical frequencies affect reading times.
	In contrast, the predictivity of PCFG surprisal does not benefit from the
	surprisal correction despite the fact that lexical sequences skipped by
	saccades are processed by readers, as demonstrated by the corrected n-gram
	measure. These results raise questions about the formulation of
	information-theoretic measures of syntactic processing such as PCFG surprisal
	and entropy reduction when applied to reading times.},
  url       = {http://aclweb.org/anthology/W16-4104}
}

@InProceedings{vajjala-EtAl:2016:CL4LC,
  author    = {Vajjala, Sowmya  and  Meurers, Detmar  and  Eitel, Alexander  and  Scheiter, Katharina},
  title     = {Towards grounding computational linguistic approaches to readability: Modeling reader-text interaction for easy and difficult texts},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {38--48},
  abstract  = {Computational approaches to readability assessment are generally built and
	evaluated using gold standard corpora labeled by publishers or teachers rather
	than being grounded in observations about human performance. Considering that
	both the reading process and the outcome can be observed, there is an empirical
	wealth that could be used to ground computational analysis of text readability.
	This will also support explicit readability models connecting text complexity
	and the reader’s language proficiency to the reading process and outcomes.
	This paper takes a step in this direction by reporting on an experiment to
	study how the rela- tion between text complexity and reader’s language
	proficiency affects the reading process and performance outcomes of readers
	after reading We modeled the reading process using three eye tracking
	variables: fixation count, average fixation count, and second pass reading
	duration. Our models for these variables explained 78.9%, 74% and 67.4%
	variance, respectively. Performance outcome was modeled through recall and
	comprehension questions, and these models explained 58.9% and 27.6% of the
	variance, respectively. While the online models give us a better under-
	standing of the cognitive correlates of reading with text complexity and
	language proficiency, modeling of the offline measures can be particularly
	relevant for incorporating user aspects into readability models.},
  url       = {http://aclweb.org/anthology/W16-4105}
}

@InProceedings{shain-EtAl:2016:CL4LC,
  author    = {Shain, Cory  and  van Schijndel, Marten  and  Futrell, Richard  and  Gibson, Edward  and  Schuler, William},
  title     = {Memory access during incremental sentence processing causes reading time latency},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {49--58},
  abstract  = {Studies on the role of memory as a predictor of reading time latencies (1)
	differ in their predictions about when memory effects should occur in
	processing and (2) have had mixed results, with strong positive effects
	emerging from isolated constructed stimuli and weak or even negative effects
	emerging from naturally-occurring stimuli. Our study addresses these concerns
	by comparing several implementations of prominent sentence processing theories
	on an exploratory corpus and evaluating the most successful of these on a
	confirmatory corpus, using a new self-paced reading corpus of seemingly natural
	narratives constructed to contain an unusually high proportion of
	memory-intensive constructions. We show highly significant and complementary
	broad-coverage latency effects both for predictors based on the Dependency
	Locality Theory and for predictors based on a left-corner parsing model of
	sentence processing. Our results indicate that memory access during sentence
	processing does take time, but suggest that stimuli requiring many memory
	access events may be necessary in order to observe the effect.},
  url       = {http://aclweb.org/anthology/W16-4106}
}

@InProceedings{gala-ziegler:2016:CL4LC,
  author    = {Gala, Nuria  and  Ziegler, Johannes},
  title     = {Reducing lexical complexity as a tool to increase text accessibility for children with dyslexia},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {59--66},
  abstract  = {Lexical complexity plays a central role in readability, particularly for
	dyslexic children and poor readers because of their slow and laborious decoding
	and word recognition skills. Although some features to aid readability may be
	common to most languages (e.g., the majority of 'easy' words are of low
	frequency), we believe that lexical complexity is mainly language-specific. In
	this paper, we define lexical complexity for French and we present a pilot
	study on the effects of text simplification in dyslexic children. The
	participants were asked to read out loud original and manually simplified
	versions of a standardized French text corpus and to answer comprehension
	questions after reading each text. The analysis of the results shows that the
	simplifications performed were beneficial in terms of reading speed and they
	reduced the number of reading errors (mainly lexical ones) without a loss in
	comprehension. Although the number of participants in this study was rather
	small (N=10), the results are
	promising and contribute to the development of applications in computational
	linguistics.},
  url       = {http://aclweb.org/anthology/W16-4107}
}

@InProceedings{delmonte:2016:CL4LC,
  author    = {Delmonte, Rodolfo},
  title     = {Syntactic and Lexical Complexity in Italian Noncanonical Structures},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {67--78},
  abstract  = {In this paper we will be dealing with different levels of complexity in the
	processing of Italian, a Romance language inheriting many properties from Latin
	which make it an almost free word order language . The paper is concerned with
	syntactic complexity as measurable on the basis of the cognitive parser that
	incrementally builds up a syntactic representation to be used by the semantic
	component. The theory behind will be LFG and parsing preferences will be used
	to justify one choice both from a principled and a processing point of view.
	LFG is a transformationless theory in which there is no deep structure separate
	from surface syntactic structure. This is partially in accordance with
	constructional theories in which noncanonical structures containing
	non-argument functions FOCUS/TOPIC are treated as multifunctional constituents.
	Complexity is computed on a processing basis following suggestions made by
	Blache and demonstrated by Kluender and Chesi},
  url       = {http://aclweb.org/anthology/W16-4108}
}

@InProceedings{shi-li-hu:2016:CL4LC,
  author    = {Shi, Haoyue  and  Li, Caihua  and  Hu, Junfeng},
  title     = {Real Multi-Sense or Pseudo Multi-Sense: An Approach to Improve Word Representation},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {79--88},
  abstract  = {Previous researches have shown that learning multiple representations for
	polysemous words can improve the performance of word embeddings on many tasks.
	However, this leads to another problem. Several vectors of a word may actually
	point to the same meaning, namely pseudo multi-sense. In this paper, we
	introduce the concept of pseudo multi-sense, and then propose an algorithm to
	detect such cases. With the consideration of the detected pseudo multi-sense
	cases, we try to refine the existing word embeddings to eliminate the influence
	of pseudo multi-sense. Moreover, we apply our algorithm on previous released
	multi-sense word embeddings and tested it on artificial word similarity tasks
	and the analogy task. The result of the experiments shows that diminishing
	pseudo multi-sense can improve the quality of word representations. Thus, our
	method is actually an efficient way to reduce linguistic complexity.},
  url       = {http://aclweb.org/anthology/W16-4109}
}

@InProceedings{gonzalezdios-aranzabe-diazdeilarraza:2016:CL4LC,
  author    = {Gonzalez-Dios, Itziar  and  Aranzabe, Mar\'{i}a Jes\'{u}s  and  D\'{i}az de Ilarraza, Arantza},
  title     = {A Preliminary Study of Statistically Predictive Syntactic Complexity Features and Manual Simplifications in Basque},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {89--97},
  abstract  = {In this paper, we present a comparative analysis of statistically predictive
	syntactic features of complexity and the treatment of these features by humans
	when simplifying texts. To that end, we have used a list of the most five
	statistically predictive features obtained automatically and the Corpus of
	Basque Simplified Texts (CBST) to analyse how the syntactic phenomena in these
	features have been manually simplified. Our aim is to go beyond the
	descriptions of operations found in the corpus and relate the multidisciplinary
	findings to understand text complexity from different points of view. We also
	present some issues that can be important when analysing linguistic complexity.},
  url       = {http://aclweb.org/anthology/W16-4110}
}

@InProceedings{heilmann-neumann:2016:CL4LC,
  author    = {Heilmann, Arndt  and  Neumann, Stella},
  title     = {Dynamic pause assessment of keystroke logged data for the detection of complexity in translation and monolingual text production},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {98--103},
  abstract  = {Pause analysis of key-stroke logged translations is a hallmark of process based
	translation studies.
	However, an exact definition of what a cognitively effortful pause during the
	translation process is has not been found yet (Saldanha and O’Brien, 2013).
	This paper investigates the design of a key-stroke and subject dependent
	identification system of cognitive effort to track complexity in translation
	with keystroke logging (cf. also (Dragsted, 2005) (Couto-Vale, in
	preparation)). It is an elastic measure that takes into account idiosyncratic
	pause duration of translators as well as further confounds such as bi-gram
	frequency, letter frequency and some motor tasks involved in writing. The
	method is
	compared to a common static threshold of 1000 ms in an analysis of cognitive
	effort during the translation of grammatical functions from English to German.
	Additionally, the results are triangulated with eye tracking data for further
	validation. The findings show that at least for smaller sets of data a
	dynamic pause assessment may lead to more accurate results than a generic
	static pause threshold of similar duration.},
  url       = {http://aclweb.org/anthology/W16-4111}
}

@InProceedings{falkenjack-jonsson:2016:CL4LC,
  author    = {Falkenjack, Johan  and  Jonsson, Arne},
  title     = {Implicit readability ranking using the latent variable of a Bayesian Probit model},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {104--112},
  abstract  = {Data driven approaches to readability analysis for languages other than English
	has been plagued by a scarcity of suitable corpora. Often, relevant corpora
	consist only of easy-to-read texts with no rank information or empirical
	readability scores, making only binary approaches, such as classification,
	applicable. We propose a Bayesian, latent variable, approach to get the most
	out of these kinds of corpora. In this paper we present results on using such a
	model for readability ranking. The model is evaluated on a preliminary corpus
	of ranked student texts with encouraging results. We also assess the model by
	showing that it performs readability classification on par with a state of the
	art classifier while at the same being transparent enough to allow more
	sophisticated interpretations.},
  url       = {http://aclweb.org/anthology/W16-4112}
}

@InProceedings{chen-meurers:2016:CL4LC,
  author    = {Chen, Xiaobin  and  Meurers, Detmar},
  title     = {CTAP: A Web-Based Tool Supporting Automatic Complexity Analysis},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {113--119},
  abstract  = {Informed by research on readability and language acquisition, computational
	linguists have developed sophisticated tools for the analysis of linguistic
	complexity. While some tools are starting to become accessible on the web,
	there still is a disconnect between the features that can in principle be
	identified based on state-of-the-art computational linguistic analysis, and the
	analyses a second language acquisition researcher, teacher, or textbook writer
	can readily obtain and visualize for their own collection of texts.
	This short paper presents a web-based tool development that aims to meet this
	challenge. The Common Text Analysis Platform (CTAP) is designed to support
	fully configurable linguistic feature extraction for a wide range of complexity
	analyses. It features a user-friendly interface, modularized and reusable
	analysis component integration, and flexible corpus and feature management.
	Building on the Unstructured Information Management framework (UIMA), CTAP
	readily supports integration of state-of-the-art NLP and complexity feature
	extraction maintaining modularization and reusability. CTAP thereby aims at
	providing a common platform for complexity analysis, encouraging research
	collaboration and sharing of feature extraction components---to jointly advance
	the state-of-the-art in complexity analysis in a form that readily supports
	real-life use by ordinary users.},
  url       = {http://aclweb.org/anthology/W16-4113}
}

@InProceedings{pilan-alfter-volodina:2016:CL4LC,
  author    = {Pil\'{a}n, Ildik\'{o}  and  Alfter, David  and  Volodina, Elena},
  title     = {Coursebook Texts as a Helping Hand for Classifying Linguistic Complexity in Language Learners' Writings},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {120--126},
  abstract  = {We bring together knowledge from two different types of language learning data,
	texts learners read and texts they write, to improve  linguistic complexity
	classification in the latter. Linguistic complexity in the foreign and second
	language learning context can be expressed in terms of proficiency levels. We
	show that incorporating features capturing lexical complexity information from
	reading passages can boost significantly the machine learning based
	classification of learner-written texts into proficiency levels. With an F1
	score of .8 our system rivals state-of-the-art results reported for other
	languages for this task. Finally, we present a freely available web-based tool
	for proficiency level classification and lexical complexity visualization for
	both learner writings and reading texts.},
  url       = {http://aclweb.org/anthology/W16-4114}
}

@InProceedings{zaghouani-EtAl:2016:CL4LC,
  author    = {Zaghouani, Wajdi  and  Hawwari, Abdelati  and  Alqahtani, Sawsan  and  Bouamor, Houda  and  Ghoneim, Mahmoud  and  Diab, Mona  and  Oflazer, Kemal},
  title     = {Using Ambiguity Detection to Streamline Linguistic Annotation},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {127--136},
  abstract  = {Arabic writing is typically underspecified for short vowels and other markups,
	referred to as diacritics. In addition to the lexical ambiguity exhibited in
	most languages, the lack of diacritics in written Arabic adds another layer of
	ambiguity which is an artifact of the orthography. In this paper, we present
	the details of three annotation experimental conditions designed to study the
	impact of  automatic ambiguity detection, on  annotation speed and quality in a
	large scale annotation project.},
  url       = {http://aclweb.org/anthology/W16-4115}
}

@InProceedings{bjerva-borstell:2016:CL4LC,
  author    = {Bjerva, Johannes  and  B\"{o}rstell, Carl},
  title     = {Morphological Complexity Influences Verb-Object Order in Swedish Sign Language},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {137--141},
  abstract  = {Computational linguistic approaches to sign languages could benefit from
	investigating how complexity influences structure.
	We investigate whether morphological complexity has an effect on the order of
	Verb (V) and Object (O) in Swedish Sign Language (SSL), on the basis of
	elicited data from five Deaf signers.
	We find a significant difference in the distribution of the orderings OV vs.
	VO, based on an analysis of morphological weight.
	While morphologically heavy verbs exhibit a general preference for OV,
	humanness seems to affect the ordering in the opposite direction, with [+human]
	Objects pushing towards a preference for VO.},
  url       = {http://aclweb.org/anthology/W16-4116}
}

@InProceedings{bentz-EtAl:2016:CL4LC,
  author    = {Bentz, Christian  and  Ruzsics, Tatyana  and  Koplenig, Alexander  and  Samardzic, Tanja},
  title     = {A Comparison Between Morphological Complexity Measures: Typological Data vs. Language Corpora},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {142--153},
  abstract  = {Language complexity is an intriguing phenomenon argued to play an important
	role in both language learning and processing. The need to compare languages
	with regard to their complexity resulted in a multitude of approaches and
	methods, ranging from accounts targeting specific structural features to global
	quantification of variation more generally. In this paper, we investigate the
	degree to which morphological complexity measures are mutually correlated in a
	sample of more than 500 languages of 101 language families. We use human expert
	judgements from the World Atlas of Language Structures (WALS), and compare them
	to four quantitative measures automatically calculated from language corpora.
	These consist of three previously defined corpus-derived measures, which are
	all monolingual, and one new measure based on automatic word-alignment across
	pairs of languages. We find strong correlations between all the measures,
	illustrating that both expert judgements and automated approaches converge to
	similar complexity ratings, and can be used interchangeably.},
  url       = {http://aclweb.org/anthology/W16-4117}
}

@InProceedings{albertsson-rennes-jonsson:2016:CL4LC,
  author    = {Albertsson, Sarah  and  Rennes, Evelina  and  Jonsson, Arne},
  title     = {Similarity-Based Alignment of Monolingual Corpora for Text Simplification Purposes},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {154--163},
  abstract  = {Comparable or parallel corpora are beneficial for many NLP tasks. The automatic
	collection of corpora enables large-scale resources, even for less-resourced
	languages, which in turn can be useful for deducing rules and patterns for text
	rewriting algorithms, a subtask of automatic text simplification.
	 We present two methods for the alignment of Swedish easy-to-read text segments
	to text segments from a reference corpus. The first method (M1) was originally
	developed for the task of text reuse detection, measuring sentence similarity
	by a modified version of a TF-IDF vector space model. A second method (M2),
	also accounting for part-of-speech tags, was developed, and the methods were
	compared.
	For evaluation, a crowdsourcing platform was built for human judgement data
	collection, and preliminary results showed that cosine similarity relates
	better to human ranks than the Dice coefficient. We also saw a tendency that
	including syntactic context to the TF-IDF vector space model is beneficial for
	this kind of paraphrase alignment task.},
  url       = {http://aclweb.org/anthology/W16-4118}
}

@InProceedings{wagnerfilho-wilkens-villavicencio:2016:CL4LC,
  author    = {Wagner Filho, Jorge Alberto  and  Wilkens, Rodrigo  and  Villavicencio, Aline},
  title     = {Automatic Construction of Large Readability Corpora},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {164--173},
  abstract  = {This work presents a framework for the automatic construction of large Web
	corpora classified by readability level. We compare different Machine Learning
	classifiers for the task of readabil- ity assessment focusing on Portuguese and
	English texts, analysing the impact of variables like the feature inventory
	used in the resulting corpus. In a comparison between shallow and deeper
	features, the former already produce F-measures of over 0.75 for Portuguese
	texts, but the use of additional features results in even better results, in
	most cases. For English, shallow features also perform well as do classic
	readability formulas. Comparing different classifiers for the task, logistic
	regression obtained, in general, the best results, but with considerable
	differences be- tween the results for two and those for three-classes,
	especially regarding the intermediary class. Given the large scale of the
	resulting corpus, for evaluation we adopt the agreement between different
	classifiers as an indication of readability assessment certainty. As a result
	of this work, a large corpus for Brazilian Portuguese was built, including 1.7
	million documents and about 1.6 billion tokens, already parsed and annotated
	with 134 different textual attributes, along with the agreement among the
	various classifiers.},
  url       = {http://aclweb.org/anthology/W16-4119}
}

@InProceedings{bloem:2016:CL4LC,
  author    = {Bloem, Jelke},
  title     = {Testing the Processing Hypothesis of word order variation using a probabilistic language model},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {174--185},
  abstract  = {This work investigates the application of a measure of surprisal to modeling a
	grammatical variation phenomenon between near-synonymous constructions. We
	investigate a particular variation phenomenon, word order variation in Dutch
	two-verb clusters, where it has been established that word order choice is
	affected by processing cost. Several multifactorial corpus studies of Dutch
	verb clusters have used other measures of processing complexity to show that
	this factor affects word order choice. This previous work allows us to compare
	the surprisal measure, which is based on constraint satisfaction theories of
	language modeling, to those previously used measures, which are more directly
	linked to empirical observations of processing complexity. Our results show
	that surprisal does not predict the word order choice by itself, but is a
	significant predictor when used in a measure of uniform information density
	(UID). This lends support to the view that human language processing is
	facilitated not so much by predictable sequences of words but more by sequences
	of words in which information is spread evenly.},
  url       = {http://aclweb.org/anthology/W16-4120}
}

@InProceedings{li-EtAl:2016:CL4LC,
  author    = {Li, Jixing  and  Brennan, Jonathan  and  Mahar, Adam  and  Hale, John},
  title     = {Temporal Lobes as Combinatory Engines for both Form and Meaning},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {186--191},
  abstract  = {The relative contributions of meaning and form to sentence processing remains
	an outstanding issue across the language sciences. We examine this issue by
	formalizing four incremental complexity metrics and comparing them against
	freely-available ROI timecourses. Syntax-related metrics based on top-down
	parsing and structural dependency-distance turn out to significantly improve a
	regression model, compared to a simpler model that formalizes only conceptual
	combination using a distributional vector-space model. This confirms the view
	of the anterior temporal lobes as combinatory engines that deal in both form
	(see e.g. Brennan et al., 2012; Mazoyer, 1993) and meaning (see e.g., Patterson
	et al., 2007). This same characterization applies to a posterior temporal
	region in roughly ``Wernicke's Area.''},
  url       = {http://aclweb.org/anthology/W16-4121}
}

@InProceedings{mirzaei-meshgi-kawahara:2016:CL4LC,
  author    = {Mirzaei, Maryam Sadat  and  Meshgi, Kourosh  and  Kawahara, Tatsuya},
  title     = {Automatic Speech Recognition Errors as a Predictor of L2 Listening Difficulties},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {192--201},
  abstract  = {This paper investigates the use of automatic speech recognition (ASR) errors as
	indicators of the second language (L2) learners' listening difficulties and in
	doing so strives to overcome the shortcomings of Partial and Synchronized
	Caption (PSC) system. PSC is a system that generates a partial caption
	including difficult words detected based on high speech rate, low frequency,
	and specificity. To improve the choice of words in this system, and explore a
	better method to detect speech challenges, ASR errors were investigated as a
	model of the L2 listener, hypothesizing that some of these errors are similar
	to those of language learners' when transcribing the videos. To investigate
	this hypothesis, ASR errors in transcription of several TED talks were analyzed
	and compared with PSC's selected words. Both the overlapping and mismatching
	cases were analyzed to investigate possible improvement for the PSC system.
	Those ASR errors that were not detected by PSC as cases of learners'
	difficulties were further analyzed and classified into four categories:
	homophones, minimal pairs, breached boundaries and negatives. These errors were
	embedded into the baseline PSC to make the enhanced version and were evaluated
	in an experiment with L2 learners. The results indicated that the enhanced
	version, which encompasses the ASR errors addresses most of the L2 learners'
	difficulties and better assists them in comprehending challenging video
	segments as compared with the baseline.},
  url       = {http://aclweb.org/anthology/W16-4122}
}

@InProceedings{singh-EtAl:2016:CL4LC,
  author    = {Singh, Abhinav Deep  and  Mehta, Poojan  and  Husain, Samar  and  Rajakrishnan, Rajkumar},
  title     = {Quantifying sentence complexity based on eye-tracking measures},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {202--212},
  abstract  = {Eye-tracking reading times have been attested to reflect cognitive processes
	underlying sentence comprehension. However, the use of reading times in NLP
	applications is an underexplored area of research. In this initial work we
	build an automatic system to assess sentence complexity using automatically
	predicted eye-tracking reading time measures and demonstrate the efficacy of
	these reading times for a well known NLP task, namely, readability assessment.
	We use a machine learning model and a set of features known to be significant
	predictors of reading times in order to learn per-word reading times from a
	corpus of English text having reading times of human readers. Subsequently, we
	use the model to predict reading times for novel text in the context of the
	aforementioned task. A model based only on reading times gave competitive
	results compared to the systems that use extensive syntactic features to
	compute linguistic complexity. Our work, to the best of our knowledge, is the
	first study to show that automatically predicted reading times can successfully
	model the difficulty of a text and can be deployed in practical text processing
	applications.},
  url       = {http://aclweb.org/anthology/W16-4123}
}

@InProceedings{takahira-tanakaishii-dbowski:2016:CL4LC,
  author    = {Takahira, Ryosuke  and  Tanaka-Ishii, Kumiko  and  Dębowski, Łukasz},
  title     = {Upper Bound of Entropy Rate Revisited ---A New Extrapolation of Compressed Large-Scale Corpora---},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {213--221},
  abstract  = {The article presents results of entropy rate estimation for human languages
	across six languages by using large, state-of-the-art corpora of up to 7.8
	gigabytes. To obtain the estimates for data length tending to infinity, we use
	an extrapolation function given by an ansatz. Whereas some ansatzes of this
	kind were proposed in previous research papers, here we introduce a stretched
	exponential extrapolation function that has a smaller error of fit. In this
	way, we uncover a possibility that the entropy rates of human languages are
	positive but 20% smaller than previously reported.},
  url       = {http://aclweb.org/anthology/W16-4124}
}

@InProceedings{bentz-berdicevskis:2016:CL4LC,
  author    = {Bentz, Christian  and  Berdicevskis, Aleksandrs},
  title     = {Learning pressures reduce morphological complexity: Linking corpus, computational and experimental evidence},
  booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {222--232},
  abstract  = {The morphological complexity of languages differs widely and changes over time.
	Pathways of change are often driven by the interplay of multiple competing
	factors, and are hard to disentangle. We here focus on a paradigmatic scenario
	of language change: the reduction of morphological complexity from Latin
	towards the Romance languages. To establish a causal explanation for this
	phenomenon, we employ three lines of evidence: 1) analyses of parallel corpora
	to measure the complexity of words in actual language production, 2)
	applications of NLP tools to further tease apart the contribution of
	inflectional morphology to word complexity, and 3) experimental data from
	artificial language learning, which illustrate the learning pressures at play
	when morphology simplifies. These three lines of evidence converge to show that
	pressures associated with imperfect language learning are good candidates to
	causally explain the reduction in morphological complexity in the
	Latin-to-Romance scenario. More generally, we argue that combining corpus,
	computational and experimental evidence is the way forward in historical
	linguistics and linguistic typology.},
  url       = {http://aclweb.org/anthology/W16-4125}
}

