@Book{COLING:2016,
  editor    = {Yuji Matsumoto  and  Rashmi Prasad},
  title     = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  url       = {http://aclweb.org/anthology/C16-1}
}

@InProceedings{bawden-crabbe:2016:COLING,
  author    = {Bawden, Rachel  and  Crabb\'{e}, Beno\^{i}t},
  title     = {Boosting for Efficient Model Selection for Syntactic Parsing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1--11},
  abstract  = {We present an efficient model selection method using boosting for
	transition-based constituency parsing. It is designed for exploring a
	high-dimensional search space, defined by a large set of feature
	templates, as for example is typically the case when parsing morphologically
	rich languages. Our method removes the need to manually define heuristic
	constraints, which are often imposed in current state-of-the-art
	selection methods. Our experiments for French show that the method is more
	efficient and is also capable of producing compact, state-of-the-art
	models.},
  url       = {http://aclweb.org/anthology/C16-1001}
}

@InProceedings{guo-EtAl:2016:COLING1,
  author    = {Guo, Jiang  and  Che, Wanxiang  and  Wang, Haifeng  and  Liu, Ting},
  title     = {A Universal Framework for Inductive Transfer Parsing across Multi-typed Treebanks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {12--22},
  abstract  = {Various treebanks have been released for dependency parsing.
	Despite that treebanks may belong to different languages or have different
	annotation schemes, they contain common syntactic knowledge that is potential
	to benefit each other.
	This paper presents a universal framework for transfer parsing across
	multi-typed treebanks with deep multi-task learning.
	We consider two kinds of treebanks as source: the multilingual universal
	treebanks and the monolingual heterogeneous treebanks.
	Knowledge across the source and target treebanks are effectively transferred
	through multi-level parameter sharing.
	Experiments on several benchmark datasets in various languages demonstrate that
	our approach can make effective use of arbitrary source treebanks to improve
	target parsing models.},
  url       = {http://aclweb.org/anthology/C16-1002}
}

@InProceedings{pate-johnson:2016:COLING,
  author    = {Pate, John K  and  Johnson, Mark},
  title     = {Grammar induction from (lots of) words alone},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {23--32},
  abstract  = {Grammar induction is the task of learning syntactic structure in a setting
	where that structure is hidden. Grammar induction from words alone is
	interesting because it is similiar to the problem that a child learning a
	language faces. Previous work has typically assumed richer but cognitively
	implausible input, such as POS tag annotated data, which makes that work less
	relevant to human language acquisition. We show that grammar induction from
	words alone is in fact feasible when the model is provided with sufficient
	training data, and present two new streaming or mini-batch algorithms for PCFG
	inference that can learn from millions of words of training data. We compare
	the performance of these algorithms to a batch algorithm that learns from less
	data. The minibatch algorithms outperform the batch algorithm, showing that
	cheap inference with more data is better than intensive inference with less
	data. Additionally, we show that the harmonic initialiser, which previous work
	identified as essential when learning from small POS-tag annotated corpora
	(Klein and Manning, 2004), is not superior to a uniform initialisation.},
  url       = {http://aclweb.org/anthology/C16-1003}
}

@InProceedings{ren-EtAl:2016:COLING,
  author    = {Ren, Pengjie  and  Wei, Furu  and  CHEN, Zhumin  and  MA, Jun  and  Zhou, Ming},
  title     = {A Redundancy-Aware Sentence Regression Framework for Extractive Summarization},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {33--43},
  abstract  = {Existing sentence regression methods for extractive summarization usually model
	sentence importance and redundancy in two separate processes. They first
	evaluate the importance f(s) of each sentence s and then select sentences to
	generate a summary based on both the importance scores and redundancy among
	sentences. In this paper, we propose to model importance and redundancy
	simultaneously by directly evaluating the relative importance f(s|S) of a
	sentence s given a set of selected sentences S. Specifically, we present a new
	framework to conduct regression with respect to the relative gain of s given S
	calculated by the ROUGE metric. Besides the single sentence features,
	additional features derived from the sentence relations are incorporated.
	Experiments on the DUC 2001, 2002 and 2004 multi-document summarization
	datasets show that the proposed method outperforms state-of-the-art extractive
	summarization approaches.},
  url       = {http://aclweb.org/anthology/C16-1004}
}

@InProceedings{laokulrat-EtAl:2016:COLING,
  author    = {Laokulrat, Natsuda  and  Phan, Sang  and  Nishida, Noriki  and  Shu, Raphael  and  Ehara, Yo  and  Okazaki, Naoaki  and  Miyao, Yusuke  and  Nakayama, Hideki},
  title     = {Generating Video Description using Sequence-to-sequence Model with Temporal Attention},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {44--52},
  abstract  = {Automatic video description generation has recently been getting attention
	after rapid advancement in image caption generation. Automatically generating
	description for a video is more challenging than for an image due to its
	temporal dynamics of frames. Most of the work relied on Recurrent Neural
	Network (RNN) and recently attentional mechanisms have also been applied to
	make the model learn to focus on some frames of the video while generating each
	word in a describing sentence.
	In this paper, we focus on a sequence-to-sequence approach with temporal
	attention mechanism. We analyze and compare the results from different
	attention model configuration. By applying the temporal attention mechanism to
	the system, we can achieve a METEOR score of 0.310 on Microsoft Video
	Description dataset, which outperformed the state-of-the-art system so far.},
  url       = {http://aclweb.org/anthology/C16-1005}
}

@InProceedings{luo-liu-litman:2016:COLING,
  author    = {Luo, Wencan  and  Liu, Fei  and  Litman, Diane},
  title     = {An Improved Phrase-based Approach to Annotating and Summarizing Student Course Responses},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {53--63},
  abstract  = {Teaching large classes remains a great challenge, primarily because it is
	difficult to attend to all the student needs in a timely manner. 
	Automatic text summarization systems can be leveraged to summarize the student
	feedback, submitted immediately after each lecture, but it is left to be
	discovered what makes a good summary for student responses. 
	In this work we explore a new methodology that effectively extracts summary
	phrases from the student responses.
	Each phrase is tagged with the number of students who raise the issue. 
	The phrases are evaluated along two dimensions: with respect to text content,
	they should be informative and well-formed, measured by the ROUGE metric;
	additionally, they shall attend to the most pressing student needs, measured by
	a newly proposed metric. 
	This work is enabled by a phrase-based annotation and highlighting scheme,
	which is new to the summarization task.
	The phrase-based framework allows us to summarize the student responses into a
	set of bullet points and present to the instructor promptly.},
  url       = {http://aclweb.org/anthology/C16-1006}
}

@InProceedings{mirza-tonelli:2016:COLING1,
  author    = {Mirza, Paramita  and  Tonelli, Sara},
  title     = {CATENA: CAusal and TEmporal relation extraction from NAtural language texts},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {64--75},
  abstract  = {We present CATENA, a sieve-based system to perform temporal and causal relation
	extraction and classification from English texts, exploiting the interaction
	between the temporal and the causal model. We evaluate the performance of each
	sieve, showing that the rule-based, the machine-learned and the reasoning
	components all contribute to achieving state-of-the-art performance on
	TempEval-3 and TimeBank-Dense data. Although causal relations are much sparser
	than temporal ones, the architecture and the selected features are mostly
	suitable to serve both tasks. The effects of the interaction between the
	temporal and the causal components, although limited, yield promising results
	and confirm the tight connection between the temporal and the causal dimension
	of texts.},
  url       = {http://aclweb.org/anthology/C16-1007}
}

@InProceedings{iso-wakamiya-aramaki:2016:COLING,
  author    = {ISO, Hayate  and  WAKAMIYA, Shoko  and  ARAMAKI, Eiji},
  title     = {Forecasting Word Model: Twitter-based Influenza Surveillance and Prediction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {76--86},
  abstract  = {Because of the increasing popularity of social media, much information has been
	shared on the internet, enabling social media users to understand various real
	world events. Particularly, social media-based infectious disease surveillance
	has attracted increasing attention. In this work, we specifically examine
	influenza: a common topic of communication on social media. The fundamental
	theory of this work is that several words, such as symptom words (fever,
	headache, etc.), appear in advance of flu epidemic occurrence. Consequently,
	past word occurrence can contribute to estimation of the number of current
	patients. To employ such forecasting words, one can first estimate the optimal
	time lag for each word based on their cross correlation. Then one can build a
	linear model consisting of word frequencies at different time points for
	nowcasting and for forecasting influenza epidemics. Experimentally obtained
	results (using 7.7 million tweets of August 2012 -- January 2016), the
	proposed model achieved the best nowcasting performance to date (correlation
	ratio 0.93) and practically sufficient forecasting performance (correlation
	ratio 0.91 in 1-week future prediction, and correlation ratio 0.77 in 3-weeks
	future prediction). This report is the first of the relevant literature to
	describe a model enabling prediction of future epidemics using Twitter.},
  url       = {http://aclweb.org/anthology/C16-1008}
}

@InProceedings{reimers-beyer-gurevych:2016:COLING,
  author    = {Reimers, Nils  and  Beyer, Philip  and  Gurevych, Iryna},
  title     = {Task-Oriented Intrinsic Evaluation of Semantic Textual Similarity},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {87--96},
  abstract  = {Semantic Textual Similarity (STS) is a foundational NLP task and can be used in
	a wide range of tasks. To determine the STS of two texts, hundreds of different
	STS systems exist, however, for an NLP system designer, it is hard to decide
	which system is the best one. To answer this question, an intrinsic evaluation
	of the STS systems is conducted by comparing the output of the system to human
	judgments on semantic similarity. The comparison is usually done using Pearson
	correlation. In this work, we show that relying on intrinsic evaluations with
	Pearson correlation can be misleading. In three common STS based tasks we could
	observe that the Pearson correlation was especially ill-suited to detect the
	best STS system for the task and other evaluation measures were much better
	suited. In this work we define how the validity of an intrinsic evaluation can
	be assessed and compare different intrinsic evaluation methods. Understanding
	of the properties of the targeted task is crucial and we propose a framework
	for conducting the intrinsic evaluation which takes the properties of the
	targeted task into account.
	Author{2}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1009}
}

@InProceedings{arcan-mccrae-buitelaar:2016:COLING,
  author    = {Arcan, Mihael  and  McCrae, John Philip  and  Buitelaar, Paul},
  title     = {Expanding wordnets to new languages with multilingual sense disambiguation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {97--108},
  abstract  = {Princeton WordNet is one of the most important resources for natural language
	processing, but is only available for English. While it has been translated
	using the expand approach to many other languages, this is an expensive manual
	process. Therefore it would be beneficial to have a high-quality automatic
	translation approach that would support NLP techniques, which rely on WordNet
	in new languages. The translation of wordnets is fundamentally complex because
	of the need to translate all senses of a word including low frequency senses,
	which is very challenging for current machine translation approaches. For this
	reason we leverage existing translations of WordNet in other languages to
	identify contextual information for wordnet senses from a large set of generic
	parallel corpora. We evaluate our approach using 10 translated wordnets for
	European languages. Our experiment shows a significant improvement over
	translation without any contextual information. Furthermore, we evaluate how
	the choice of pivot languages affects performance of multilingual word sense
	disambiguation.},
  url       = {http://aclweb.org/anthology/C16-1010}
}

@InProceedings{saha-EtAl:2016:COLING,
  author    = {Saha, Amrita  and  Khapra, Mitesh M.  and  Chandar, Sarath  and  Rajendran, Janarthanan  and  Cho, Kyunghyun},
  title     = {A Correlational Encoder Decoder Architecture for Pivot Based Sequence Generation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {109--118},
  abstract  = {Interlingua based Machine Translation (MT) aims to encode multiple languages
	into a common linguistic representation and then decode sentences in multiple
	target languages from this representation. In this work we explore this idea in
	the context of neural encoder decoder architectures, albeit on a smaller scale
	and without MT as the end goal. Specifically, we consider the case of three
	languages or modalities X, Z and Y wherein we are interested in generating
	sequences in Y starting from information available in X. However, there is no
	parallel training data available between X and Y but, training data is
	available between X \& Z and Z \& Y (as is often the case in many real world
	applications). Z thus acts as a pivot/bridge. An obvious solution, which is
	perhaps less elegant but works very well in practice is to train a two stage
	model which first converts from X to Z and then from Z to Y. Instead we explore
	an interlingua inspired solution which jointly learns to do the following (i)
	encode X and Z to a common representation and (ii) decode Y from this common
	representation. We evaluate our model on two tasks: (i) bridge transliteration
	and (ii) bridge captioning. We report promising results in both these
	applications and believe that this is a right step towards truly interlingua
	inspired encoder decoder architectures.},
  url       = {http://aclweb.org/anthology/C16-1011}
}

@InProceedings{aufrant-wisniewski-yvon:2016:COLING,
  author    = {Aufrant, Lauriane  and  Wisniewski, Guillaume  and  Yvon, Fran\c{c}ois},
  title     = {Zero-resource Dependency Parsing: Boosting Delexicalized Cross-lingual Transfer with Linguistic Knowledge},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {119--130},
  abstract  = {This paper studies cross-lingual transfer for dependency parsing, focusing on
	very low-resource settings where delexicalized transfer is the only fully
	automatic option. We show how to boost parsing performance by rewriting the
	source sentences so as to better match the linguistic regularities of the
	target language. We contrast a data-driven approach with an approach relying on
	linguistically motivated rules automatically extracted from the World Atlas of
	Language Structures. Our findings are backed up by experiments involving 40
	languages. They show that both approaches greatly outperform the baseline, the
	knowledge-driven method yielding the best accuracies, with average improvements
	of +2.9 UAS, and up to +90 UAS (absolute) on some frequent PoS configurations.},
  url       = {http://aclweb.org/anthology/C16-1012}
}

@InProceedings{bollmann-sogaard:2016:COLING,
  author    = {Bollmann, Marcel  and  S{\o}gaard, Anders},
  title     = {Improving historical spelling normalization with bi-directional LSTMs and multi-task learning},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {131--139},
  abstract  = {Natural-language processing of historical documents is complicated by the
	abundance of variant spellings and lack of annotated data. A common approach is
	to normalize the spelling of historical words to modern forms. We explore the
	suitability of a deep neural network architecture for this task, particularly a
	deep bi-LSTM network applied on a character level. Our model compares well to
	previously established normalization algorithms when evaluated on a diverse set
	of texts from Early New High German. We show that multi-task learning with
	additional normalization data can improve our model's performance further.},
  url       = {http://aclweb.org/anthology/C16-1013}
}

@InProceedings{ren-zhang:2016:COLING,
  author    = {Ren, Yafeng  and  Zhang, Yue},
  title     = {Deceptive Opinion Spam Detection Using Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {140--150},
  abstract  = {Deceptive opinion spam detection has attracted significant attention from both
	business and research communities. Existing approaches are based on manual
	discrete features, which can capture linguistic and psychological cues.
	However, such features fail to encode the semantic meaning of a document from
	the discourse perspective, which limits the performance. In this paper, we
	empirically explore a neural network model to learn document-level
	representation for detecting deceptive opinion spam. In particular, given a
	document, the model learns sentence representations with a convolutional neural
	network, which are combined using a gated recurrent neural network with
	attention mechanism to model discourse information and yield a document vector.
	Finally, the document representation is used directly as features to identify
	deceptive opinion spam. Experimental results on three domains (Hotel,
	Restaurant, and Doctor) show that our proposed method outperforms
	state-of-the-art methods.},
  url       = {http://aclweb.org/anthology/C16-1014}
}

@InProceedings{li-EtAl:2016:COLING1,
  author    = {Li, Ximing  and  Chi, Jinjin  and  Li, Changchun  and  Ouyang, Jihong  and  Fu, Bo},
  title     = {Integrating Topic Modeling with Word Embeddings by Mixtures of vMFs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {151--160},
  abstract  = {Gaussian LDA integrates topic modeling with word embeddings by replacing
	discrete topic distribution over word types with multivariate Gaussian
	distribution on the embedding space. This can take semantic information of
	words into account. However, the Euclidean similarity used in Gaussian topics
	is not an optimal semantic measure for word embeddings. Acknowledgedly, the
	cosine similarity better describes the semantic relatedness between word
	embeddings. To employ the cosine measure and capture complex topic structure,
	we use von Mises-Fisher (vMF) mixture models to represent topics, and then
	develop a novel mix-vMF topic model (MvTM). Using public pre-trained word
	embeddings, we evaluate MvTM on three real-world data sets. Experimental
	results show that our model can discover more coherent topics than the
	state-of-the-art baseline models, and achieve competitive classification
	performance.},
  url       = {http://aclweb.org/anthology/C16-1015}
}

@InProceedings{takeda-komatani:2016:COLING,
  author    = {Takeda, Ryu  and  Komatani, Kazunori},
  title     = {Bayesian Language Model based on Mixture of Segmental Contexts for Spontaneous Utterances with Unexpected Words},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {161--170},
  abstract  = {This paper describes a Bayesian language model for predicting spontaneous
	utterances. 
	People sometimes say unexpected words, such as fillers or hesitations, 
	that cause the miss-prediction of words in normal N-gram models. 
	Our proposed model considers mixtures of possible segmental contexts, 
	that is, a kind of context-word selection. 
	It can reduce negative effects caused by unexpected words 
	because it represents conditional occurrence probabilities of a word as
	weighted mixtures of possible segmental contexts.
	The tuning of mixture weights is the key issue in this approach as the segment
	patterns becomes numerous, thus we resolve it by using Bayesian model. 
	The generative process is achieved by combining the stick-breaking process 
	and the process used in the variable order Pitman-Yor language model. 
	Experimental evaluations revealed that our model outperformed contiguous N-gram
	models in terms of perplexity for noisy text including hesitations.},
  url       = {http://aclweb.org/anthology/C16-1016}
}

@InProceedings{ma-cambria-gao:2016:COLING,
  author    = {Ma, Yukun  and  Cambria, Erik  and  GAO, SA},
  title     = {Label Embedding for Zero-shot Fine-grained Named Entity Typing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {171--180},
  abstract  = {Named entity typing is the task of detecting the types of a named entity in
	context. For instance, given "Eric is giving a presentation", our goal is to
	infer that `Eric' is a speaker or a presenter and a person. Existing approaches
	to named entity typing cannot work with a growing type set and fails to
	recognize entity mentions of unseen types. In this paper, we present a label
	embedding method that incorporates prototypical and hierarchical information to
	learn pre-trained label embeddings. In addition, we adapt a zero-shot learning
	framework that can predict both seen and previously unseen entity types. We
	perform evaluation on three benchmark datasets with two settings: 1) few-shots
	recognition where all types are covered by the training set; and 2) zero-shot
	recognition where fine-grained types are assumed absent from training set.
	Results show that prior knowledge encoded using our label embedding methods can
	significantly boost the performance of classification for both cases.},
  url       = {http://aclweb.org/anthology/C16-1017}
}

@InProceedings{shen-EtAl:2016:COLING1,
  author    = {Shen, Qinlan  and  Clothiaux, Daniel  and  Tagtow, Emily  and  Littell, Patrick  and  Dyer, Chris},
  title     = {The Role of Context in Neural Morphological Disambiguation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {181--191},
  abstract  = {Languages with rich morphology often introduce sparsity in language processing
	tasks. While morphological analyzers can reduce this sparsity by providing
	morpheme-level analyses for words, they will often introduce ambiguity by
	returning multiple analyses for the same surface form. The problem of
	disambiguating between these morphological parses is further complicated by the
	fact that a correct parse for a word is not only be dependent on the surface
	form but also on other words in its context. In this paper, we present a
	language-agnostic approach to morphological disambiguation. We address the
	problem of using context in morphological disambiguation by presenting several
	LSTM-based neural architectures that encode long-range surface-level and
	analysis-level contextual dependencies. We applied our approach to Turkish,
	Russian, and Arabic to compare effectiveness across languages, matching
	state-of-the-art results in two of the three languages. Our results also
	demonstrate that while context plays a role in learning how to disambiguate,
	the type and amount of context needed varies between languages.},
  url       = {http://aclweb.org/anthology/C16-1018}
}

@InProceedings{sun:2016:COLING,
  author    = {Sun, Xu},
  title     = {Asynchronous Parallel Learning for Neural Networks and Structured Models with Dense Features},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {192--202},
  abstract  = {Existing asynchronous parallel learning methods are only for the sparse feature
	models, and they face new challenges for the dense feature models like neural
	networks (e.g., LSTM, RNN). The problem for dense features is that asynchronous
	parallel learning brings gradient errors derived from overwrite actions. We
	show that gradient errors are very common and inevitable. Nevertheless, our
	theoretical analysis shows that the learning process with gradient errors can
	still be convergent towards the optimum of objective functions for many
	practical applications. Thus, we propose a simple method \emph{AsynGrad} for
	asynchronous parallel learning with gradient error. Base on various dense
	feature models (LSTM, dense-CRF) and various NLP tasks, experiments show that
	\emph{AsynGrad} achieves substantial improvement on training speed, and without
	any loss on accuracy.},
  url       = {http://aclweb.org/anthology/C16-1019}
}

@InProceedings{wu-zhang-zong:2016:COLING,
  author    = {Wu, Huijia  and  Zhang, Jiajun  and  Zong, Chengqing},
  title     = {An Empirical Exploration of Skip Connections for Sequential Tagging},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {203--212},
  abstract  = {In this paper, we empirically explore the effects of various kinds of skip
	connections in stacked bidirectional LSTMs for sequential tagging. We
	investigate three kinds of skip connections connecting to LSTM cells: (a) skip
	connections to the gates, (b) skip connections to the internal states and (c)
	skip connections to the cell outputs. We present comprehensive experiments
	showing that skip connections to cell outputs outperform the remaining two.
	Furthermore, we observe that using gated identity functions as skip mappings
	works pretty well. Based on this novel skip connections, we successfully train
	deep stacked bidirectional LSTM models and obtain state-of-the-art results on
	CCG supertagging and comparable results on POS tagging.},
  url       = {http://aclweb.org/anthology/C16-1020}
}

@InProceedings{wang-EtAl:2016:COLING1,
  author    = {Wang, Xun  and  Nishino, Masaaki  and  Hirao, Tsutomu  and  Sudoh, Katsuhito  and  Nagata, Masaaki},
  title     = {Exploring Text Links for Coherent Multi-Document Summarization},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {213--223},
  abstract  = {Summarization aims to represent source documents by a shortened passage.
	Existing methods focus on the extraction of key information, but often neglect
	coherence. Hence the generated summaries suffer from a lack of readability. 
	To address this problem, we have developed a graph-based method by exploring
	the links between text to produce coherent summaries.
	Our approach involves finding a sequence of sentences that best represent the
	key information in a coherent way. In contrast to the previous methods that
	focus only on salience, the proposed method addresses both coherence and
	informativeness based on textual linkages. We conduct experiments on the
	DUC2004 summarization task data set. A performance comparison reveals that the
	summaries generated by the proposed system achieve comparable results in terms
	of the ROUGE metric, and show improvements in readability by human evaluation.},
  url       = {http://aclweb.org/anthology/C16-1021}
}

@InProceedings{mcmahan-stone:2016:COLING,
  author    = {McMahan, Brian  and  Stone, Matthew},
  title     = {Syntactic realization with data-driven neural tree grammars},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {224--235},
  abstract  = {A key component in surface realization in natural language generation is to
	choose concrete syntactic relationships to express a target meaning. We develop
	a new method for syntactic choice based on learning a stochastic tree grammar
	in a neural architecture. This framework can exploit state-of-the-art methods
	for modeling word sequences and generalizing across vocabulary. We also induce
	embeddings to generalize over elementary tree structures and exploit a tree
	recurrence over the input structure to model long-distance influences between
	NLG choices. We evaluate the models on the task of linearizing unannotated
	dependency trees, documenting the contribution of our modeling techniques to
	improvements in both accuracy and run time.},
  url       = {http://aclweb.org/anthology/C16-1022}
}

@InProceedings{li-he-zhuge:2016:COLING,
  author    = {Li, Wei  and  He, Lei  and  Zhuge, Hai},
  title     = {Abstractive News Summarization based on Event Semantic Link Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {236--246},
  abstract  = {This paper studies the abstractive multi-document summarization for
	event-oriented news texts through event information extraction and abstract
	representation. Fine-grained event mentions and semantic relations between them
	are extracted to build a unified and connected event semantic link network, an
	abstract representation of source texts. A network reduction algorithm is
	proposed to summarize the most salient and coherent event information. New
	sentences with good linguistic quality are automatically generated and selected
	through sentences over-generation and greedy-selection processes. Experimental
	results on DUC 2006 and DUC 2007 datasets show that our system significantly
	outperforms the state-of-the-art extractive and abstractive baselines under
	both pyramid and ROUGE evaluation metrics.},
  url       = {http://aclweb.org/anthology/C16-1023}
}

@InProceedings{peyrard-ecklekohler:2016:COLING,
  author    = {Peyrard, Maxime  and  Eckle-Kohler, Judith},
  title     = {A General Optimization Framework for Multi-Document Summarization Using Genetic Algorithms and Swarm Intelligence},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {247--257},
  abstract  = {Extracting summaries via integer linear programming and submodularity are
	popular and successful techniques in extractive multi-document summarization.
	However, many interesting optimization objectives are neither submodular nor
	factorizable into an integer linear program. We address this issue and present
	a general optimization framework where any function of input documents and a
	system summary can be plugged in. Our framework includes two kinds of
	summarizers -- one based on genetic algorithms, the other using a swarm
	intelligence approach. In our experimental evaluation, we investigate the
	optimization of two information-theoretic summary evaluation metrics and find
	that our framework yields competitive results compared to several strong
	summarization baselines. Our comparative analysis of the genetic and swarm
	summarizers reveals interesting complementary properties.},
  url       = {http://aclweb.org/anthology/C16-1024}
}

@InProceedings{rojasbarahona-EtAl:2016:COLING,
  author    = {Rojas Barahona, Lina M.  and  Gasic, Milica  and  Mrk\v{s}i\'{c}, Nikola  and  Su, Pei-Hao  and  Ultes, Stefan  and  Wen, Tsung-Hsien  and  Young, Steve},
  title     = {Exploiting Sentence and Context Representations in Deep Neural Models for Spoken Language Understanding},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {258--267},
  abstract  = {This paper presents a deep learning architecture for the semantic decoder
	component of a Statistical Spoken Dialogue System. In a slot-filling dialogue,
	the semantic decoder predicts the dialogue act and a set of slot-value pairs
	from a set of n-best hypotheses returned by the Automatic Speech Recognition. 
	Most current  models for spoken language understanding assume (i) word-aligned
	semantic annotations as in sequence taggers and (ii) delexicalisation, or a
	mapping of input words to domain-specific concepts using heuristics that  try
	to capture morphological variation but that do not scale to other domains nor
	to language variation (e.g., morphology, synonyms, paraphrasing ). In this work
	the semantic decoder is trained using unaligned semantic annotations and it
	uses distributed semantic representation learning to overcome the limitations
	of explicit delexicalisation.  The proposed  architecture uses a convolutional
	neural network for the sentence representation and a
	long-short term memory network for the context representation. Results are
	presented for the publicly available DSTC2 corpus and an In-car corpus which is
	similar to DSTC2 but has a significantly higher word error rate (WER).},
  url       = {http://aclweb.org/anthology/C16-1025}
}

@InProceedings{kohn-baumann:2016:COLING,
  author    = {K\"{o}hn, Arne  and  Baumann, Timo},
  title     = {Predictive Incremental Parsing Helps Language Modeling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {268--277},
  abstract  = {Predictive incremental parsing produces syntactic representations of
	sentences as they are produced, e.g. by typing or speaking.  In order
	to generate connected parses for such unfinished sentences, upcoming
	word types can be hypothesized and structurally integrated with
	already realized words.  For example, the presence of a determiner as
	the last word of a sentence prefix may indicate that a noun will
	appear somewhere in the completion of that sentence, and the
	determiner can be attached to the predicted noun.  We combine the
	forward-looking parser predictions with backward-looking N-gram
	histories and analyze in a set of experiments the impact on language
	models, i.e. stronger discriminative power but also higher data
	sparsity.  Conditioning N-gram models, MaxEnt models or RNN-LMs on
	parser predictions yields perplexity reductions of about 6%.  Our
	method (a) retains online decoding capabilities and (b) incurs
	relatively little computational overhead which sets it apart from
	previous approaches that use syntax for language modeling.  Our method
	is particularly attractive for modular systems that make use of a
	syntax parser anyway, e.g. as part of an understanding pipeline where
	predictive parsing improves language modeling at no additional cost.},
  url       = {http://aclweb.org/anthology/C16-1026}
}

@InProceedings{wang-che-liu:2016:COLING,
  author    = {Wang, Shaolei  and  Che, Wanxiang  and  Liu, Ting},
  title     = {A Neural Attention Model for Disfluency Detection},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {278--287},
  abstract  = {In this paper, we study the problem of disfluency detection using the
	encoder-decoder framework. We treat disfluency detection as a
	sequence-to-sequence problem and propose a neural attention-based model which
	can efficiently model the long-range dependencies between words and make the
	resulting sentence more likely to be grammatically correct. Our model firstly
	encode the source sentence with a bidirectional Long Short-Term Memory
	(BI-LSTM) and then use the neural attention as a pointer to select an ordered
	sub sequence of the input as the output. Experiments show that our model
	achieves the state-of-the-art f-score of  86.7\% on the commonly used English
	Switchboard test set. We also evaluate the performance of our model on the
	in-house annotated Chinese data and achieve a significantly higher f-score
	compared to the baseline of CRF-based approach.},
  url       = {http://aclweb.org/anthology/C16-1027}
}

@InProceedings{hellwig:2016:COLING,
  author    = {Hellwig, Oliver},
  title     = {Detecting Sentence Boundaries in Sanskrit Texts},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {288--297},
  abstract  = {The paper applies a deep recurrent neural network to the task of sentence
	boundary detection in Sanskrit, an important, yet underresourced ancient Indian
	language. The deep learning approach improves the F scores set by a metrical
	baseline and by a Conditional Random Field classifier by more than 10%.},
  url       = {http://aclweb.org/anthology/C16-1028}
}

@InProceedings{shen-EtAl:2016:COLING2,
  author    = {Shen, Mo  and  Li, Wingmui  and  Choe, HyunJeong  and  Chu, Chenhui  and  Kawahara, Daisuke  and  Kurohashi, Sadao},
  title     = {Consistent Word Segmentation, Part-of-Speech Tagging and Dependency Labelling Annotation for Chinese Language},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {298--308},
  abstract  = {In this paper, we propose a new annotation approach to Chinese word
	segmentation, part-of-speech (POS) tagging and dependency labelling that aims
	to overcome the two major issues in traditional morphology-based annotation:
	Inconsistency and data sparsity. We re-annotate the Penn Chinese Treebank 5.0
	(CTB5) and demonstrate the advantages of this approach compared to the original
	CTB5 annotation through word segmentation, POS tagging and machine translation
	experiments.},
  url       = {http://aclweb.org/anthology/C16-1029}
}

@InProceedings{rei-crichton-pyysalo:2016:COLING,
  author    = {Rei, Marek  and  Crichton, Gamal  and  Pyysalo, Sampo},
  title     = {Attending to Characters in Neural Sequence Labeling Models},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {309--318},
  abstract  = {Sequence labeling architectures use word embeddings for capturing similarity,
	but suffer when handling previously unseen or rare words. 
	We investigate character-level extensions to such models and propose a novel
	architecture for combining alternative word representations. 
	By using an attention mechanism, the model is able to dynamically decide how
	much information to use from a word- or character-level component. 
	We evaluated different architectures on a range of sequence labeling datasets,
	and character-level extensions were found to improve performance on every
	benchmark. 
	In addition, the proposed attention-based architecture delivered the best
	results even with a smaller number of trainable parameters.},
  url       = {http://aclweb.org/anthology/C16-1030}
}

@InProceedings{zhou-EtAl:2016:COLING1,
  author    = {Zhou, Nina  and  Aw, AiTi  and  Lertcheva, Nattadaporn  and  Wang, Xuancong},
  title     = {A Word Labeling Approach to Thai Sentence Boundary Detection and POS Tagging},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {319--327},
  abstract  = {Previous studies on Thai Sentence Boundary Detection (SBD) mostly assumed
	sentence ends at a space disambiguation problem, which classified space either
	as an indicator for Sentence Boundary (SB) or non-Sentence Boundary (nSB). In
	this paper, we propose a word labeling approach which treats space as a normal
	word, and detects SB between any two words. This removes the restriction for SB
	to be oc-curred only at space and makes our system more robust for modern Thai
	writing. It is because in modern Thai writing, space is not consistently used
	to indicate SB. As syntactic information contributes to better SBD, we further
	propose a joint Part-Of-Speech (POS) tagging and SBD framework based on
	Factorial Conditional Random Field (FCRF) model. We compare the performance of
	our proposed ap-proach with reported methods on ORCHID corpus. We also
	performed experiments of FCRF model on the TaLAPi corpus. The results show that
	the word labelling approach has better performance than pre-vious space-based
	classification approaches and FCRF joint model outperforms LCRF model in terms
	of SBD in all experiments.},
  url       = {http://aclweb.org/anthology/C16-1031}
}

@InProceedings{horsmann-zesch:2016:COLING,
  author    = {Horsmann, Tobias  and  Zesch, Torsten},
  title     = {Assigning Fine-grained PoS Tags based on High-precision Coarse-grained Tagging},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {328--336},
  abstract  = {We propose a new approach to PoS tagging where in a first step, we assign a
	coarse-grained tag corresponding to the main syntactic category.
	Based on this high-precision decision, in the second step we utilize specially
	trained fine-grained models with heavily reduced decision complexity.
	By analyzing the system under oracle conditions, we show that there is a quite
	large potential for significantly outperforming a competitive baseline.
	When we take error-propagation from the coarse-grained tagging into account,
	our approach is on par with the state of the art.
	Our approach also allows tailoring the tagger towards recognizing single word
	classes which are of interest e.g. for researchers searching for specific
	phenomena in large corpora.
	In a case study, we significantly outperform a standard model that also makes
	use of the same optimizations.},
  url       = {http://aclweb.org/anthology/C16-1032}
}

@InProceedings{more-tsarfaty:2016:COLING,
  author    = {More, Amir  and  Tsarfaty, Reut},
  title     = {Data-Driven Morphological Analysis and Disambiguation for Morphologically Rich Languages and Universal Dependencies},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {337--348},
  abstract  = {Parsing texts into universal dependencies (UD) in realistic scenarios requires
	infrastructure for the morphological analysis and disambiguation (MA\&D) of
	typologically different languages as a first tier. MA\&D  is particularly
	challenging in morphologically rich languages (MRLs), where the  ambiguous
	space-delimited tokens ought to be disambiguated with respect to their
	constituent morphemes, each morpheme carrying its own tag and a rich set
	features. Here we present a novel, language-agnostic, framework for MA\&D, based
	on a transition system with two  variants --- word-based and morpheme-based ---
	and a dedicated transition to mitigate the biases of variable-length morpheme
	sequences. Our experiments on a  Modern Hebrew case study show state of the art
	results, and we show that the morpheme-based MD consistently outperforms our
	word-based variant. We further illustrate the utility and multilingual coverage
	 of our framework by morphologically analyzing and disambiguating the large set
	of languages in the UD treebanks.},
  url       = {http://aclweb.org/anthology/C16-1033}
}

@InProceedings{gyanendrosingh-laitonjam-ranbirsingh:2016:COLING,
  author    = {Gyanendro Singh, Loitongbam  and  Laitonjam, Lenin  and  Ranbir Singh, Sanasam},
  title     = {Automatic Syllabification for Manipuri language},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {349--357},
  abstract  = {Development of hand crafted rule for syllabifying words of a language is an
	expensive task. 
	This paper proposes several data-driven methods for automatic syllabification
	of words written in Manipuri language. Manipuri is one of the scheduled Indian
	languages. First, we propose a language-independent rule-based approach
	formulated using entropy based phonotactic segmentation. Second, we project the
	syllabification problem as a sequence labeling problem and investigate its
	effect using various sequence labeling approaches. Third, we combine the effect
	of sequence labeling and rule-based method and investigate the performance of
	the hybrid approach. 
	From various experimental observations, it is evident that the proposed methods
	outperform the baseline rule-based method. 
	The entropy based phonotactic segmentation provides a word accuracy of 96%, CRF
	(sequence labeling approach) provides 97% and hybrid approach provides 98% word
	accuracy.},
  url       = {http://aclweb.org/anthology/C16-1034}
}

@InProceedings{chen-EtAl:2016:COLING1,
  author    = {Chen, Kuan-Yu  and  Liu, Shih-Hung  and  Chen, Berlin  and  Wang, Hsin-Min},
  title     = {Learning to Distill: The Essence Vector Modeling Framework},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {358--368},
  abstract  = {In the context of natural language processing, representation learning has
	emerged as a newly active research subject because of its excellent performance
	in many applications. Learning representations of words is a pioneering study
	in
	this school of research. However, paragraph (or sentence and document)
	embedding learning is more suitable/reasonable for some tasks, such as
	sentiment classification and document summarization. Nevertheless, as far as we
	are aware, there is only a dearth of research focusing on launching
	unsupervised paragraph embedding methods. Classic paragraph embedding methods
	infer the representation of a given paragraph by considering all of the words
	occurring in the paragraph. Consequently, those stop or function words that
	occur frequently may mislead the embedding learning process to produce a misty
	paragraph representation. Motivated by these observations, our major
	contributions are twofold. First, we propose a novel unsupervised paragraph
	embedding method, named the essence vector (EV) model, which aims at not only
	distilling the most representative information from a paragraph but also
	excluding the general background information to produce a more informative
	low-dimensional vector representation for the paragraph. We evaluate the
	proposed EV model on benchmark sentiment classification and multi-document
	summarization tasks. The experimental results demonstrate the effectiveness and
	applicability of the proposed embedding method. Second, in view of the
	increasing importance of spoken content processing, an extension of the EV
	model, named the denoising essence vector (D-EV) model, is proposed. The D-EV
	model not only inherits the advantages of the EV model but also can infer a
	more robust representation for a given spoken paragraph against imperfect
	speech recognition. The utility of the D-EV model is evaluated on a spoken
	document summarization task, confirming the effectiveness of the proposed
	embedding method in relation to several well-practiced and state-of-the-art
	summarization methods.},
  url       = {http://aclweb.org/anthology/C16-1035}
}

@InProceedings{lorenzotrueba-EtAl:2016:COLING,
  author    = {Lorenzo-Trueba, Jaime  and  Barra-Chicote, Roberto  and  Gallardo-Antolin, Ascension  and  Yamagishi, Junichi  and  Montero, Juan M},
  title     = {Continuous Expressive Speaking Styles Synthesis based on CVSM and MR-HMM},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {369--376},
  abstract  = {This paper introduces a continuous system capable of automatically producing
	the most adequate speaking style to synthesize a desired target text. This is
	done thanks to a joint modeling of the acoustic and lexical parameters of the
	speaker models by adapting the CVSM projection of the training texts using
	MR-HMM techniques. As such, we consider that as long as sufficient variety in
	the training data is available, we should be able to model a continuous lexical
	space into a continuous acoustic space. The proposed continuous automatic text
	to speech system was evaluated by means of a perceptual evaluation in order to
	compare them with traditional approaches to the task. The system proved to be
	capable of conveying the correct expressiveness (average adequacy of 3.6) with
	an expressive strength comparable to oracle traditional expressive speech
	synthesis (average of 3.6) although with a drop in speech quality mainly due to
	the semi-continuous nature of the data (average quality of 2.9). This means
	that the proposed system is capable of improving traditional neutral systems
	without requiring any additional user interaction.
	Author{2}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1036}
}

@InProceedings{dominguez-farrus-wanner:2016:COLING,
  author    = {Dominguez, Monica  and  Farr\'{u}s, Mireia  and  Wanner, Leo},
  title     = {An Automatic Prosody Tagger for Spontaneous Speech},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {377--386},
  abstract  = {Speech prosody is known to be central in advanced communication technologies.
	However, despite the advances of theoretical studies in speech prosody, so far,
	no large scale prosody annotated resources that would facilitate empirical
	research and the development of empirical computational approaches are
	available. This is to a large extent due to the fact that current common
	prosody annotation conventions offer a descriptive framework of intonation
	contours and phrasing based on labels. This makes it difficult to reach a
	satisfactory inter-annotator agreement during the annotation of gold standard
	annotations and, subsequently, to create consistent large scale annotations. To
	address this problem, we present an annotation schema for prominence and
	boundary labeling of prosodic phrases based upon acoustic parameters and a
	tagger for prosody annotation at the prosodic phrase level. Evaluation proves
	that inter-annotator agreement reaches satisfactory values, from 0.60 to 0.80
	Cohen's kappa, while the prosody tagger achieves acceptable recall and
	f-measure figures for five spontaneous samples used in the evaluation of
	monologue and dialogue formats in English and Spanish. The work presented in
	this paper is a first step towards a semi-automatic acquisition of large
	corpora for empirical prosodic analysis.},
  url       = {http://aclweb.org/anthology/C16-1037}
}

@InProceedings{kim-stratos-sarikaya:2016:COLING1,
  author    = {Kim, Young-Bum  and  Stratos, Karl  and  Sarikaya, Ruhi},
  title     = {Frustratingly Easy Neural Domain Adaptation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {387--396},
  abstract  = {Popular techniques for domain adaptation such as the feature augmentation
	method of Daum\'{e} III
	(2009) have mostly been considered for sparse binary-valued features, but not
	for dense real-
	valued features such as those used in neural networks. In this paper, we
	describe simple neural
	extensions of these techniques. First, we propose a natural generalization of
	the feature augmen-
	tation method that uses K + 1 LSTMs where one model captures global patterns
	across all K
	domains and the remaining K models capture domain-specific information. Second,
	we propose
	a novel application of the framework for learning shared structures by Ando and
	Zhang (2005)
	to domain adaptation, and also provide a neural extension of their approach. In
	experiments on
	slot tagging over 17 domains, our methods give clear performance improvement
	over Daum\'{e} III
	(2009) applied on feature-rich CRFs.},
  url       = {http://aclweb.org/anthology/C16-1038}
}

@InProceedings{bhat-EtAl:2016:COLING,
  author    = {Bhat, Riyaz A.  and  Bhat, Irshad A.  and  Jain, Naman  and  Sharma, Dipti Misra},
  title     = {A House United: Bridging the Script and Lexical Barrier between Hindi and Urdu},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {397--408},
  abstract  = {In Computational Linguistics, Hindi and Urdu are not viewed as a monolithic
	entity and have received separate attention with respect to their text
	processing. From part-of-speech tagging to machine translation, models are
	separately trained for both Hindi and Urdu despite the fact that they
	represent the same language. The reasons mainly are their divergent literary
	vocabularies and separate orthographies, and probably also their political
	status and the social perception that they are two separate languages. In this
	article, we propose a simple but efficient approach to bridge the lexical and
	orthographic differences between Hindi and Urdu texts. With respect to text
	processing, addressing the differences between the Hindi and Urdu texts would
	be beneficial in the following ways: (a) instead of training separate models,
	their individual resources can be augmented to train single, unified models for
	better generalization, and (b) their individual text processing applications
	can be used interchangeably under varied resource conditions.
	To remove the script barrier, we learn accurate statistical transliteration
	models which use sentence-level decoding to resolve word ambiguity. Similarly,
	we learn cross-register word embeddings from the harmonized Hindi and Urdu
	corpora to nullify their lexical divergences. As a proof of the concept, we
	evaluate our approach on the Hindi and Urdu dependency parsing under two
	scenarios: (a) resource sharing, and (b) resource augmentation. We demonstrate
	that a neural network-based dependency parser trained on augmented, harmonized
	Hindi and Urdu resources performs significantly better than the parsing models
	trained separately on the individual resources. We also show that we can
	achieve near state-of-the-art results when the parsers are used
	interchangeably.},
  url       = {http://aclweb.org/anthology/C16-1039}
}

@InProceedings{michalon-EtAl:2016:COLING,
  author    = {Michalon, Olivier  and  Ribeyre, Corentin  and  Candito, Marie  and  Nasr, Alexis},
  title     = {Deeper syntax for better semantic parsing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {409--420},
  abstract  = {Syntax plays an important role in the task of predicting the semantic structure
	of a sentence. But syntactic phenomena such as alternations, control and
	raising tend to obfuscate the relation between syntax and semantics. In this
	paper we predict the semantic structure of a sentence using a deeper syntax
	than what is usually done. This deep syntactic representation abstracts away
	from purely syntactic phenomena and proposes a structural organization of the
	sentence that is closer to the semantic representation. Experiments conducted
	on a French corpus annotated with semantic frames showed that a semantic parser
	reaches better performances with such a deep syntactic input.},
  url       = {http://aclweb.org/anthology/C16-1040}
}

@InProceedings{lee-wang:2016:COLING,
  author    = {Lee, Young-Suk  and  Wang, Zhiguo},
  title     = {Language Independent Dependency to Constituent Tree Conversion},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {421--428},
  abstract  = {We present a dependency to constituent tree conversion technique that aims to
	improve constituent parsing accuracies by leveraging dependency treebanks
	available in a wide variety in many languages. The technique works in two
	steps. First, a partial constituent tree is derived from a dependency tree with
	a very simple deterministic algorithm that is both language and dependency type
	independent. Second, a complete high accuracy constituent tree is derived with
	a constraint-based parser, which uses the partial constituent
	tree as external constraints. Evaluated on Section 22 of the WSJ Treebank, the
	technique achieves the state-of-the-art conversion F-score 95.6. When applied
	to English Universal Dependency treebank and German CoNLL2006 treebank, the
	converted treebanks added to the human-annotated constituent parser training
	corpus improve parsing F-scores significantly for both languages.},
  url       = {http://aclweb.org/anthology/C16-1041}
}

@InProceedings{waszczuk-savary-parmentier:2016:COLING,
  author    = {Waszczuk, Jakub  and  Savary, Agata  and  Parmentier, Yannick},
  title     = {Promoting multiword expressions in A* TAG parsing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {429--439},
  abstract  = {Multiword expressions (MWEs) are pervasive in natural languages and often have
	both idiomatic and compositional readings, which leads to high syntactic
	ambiguity. We show that for some MWE types idiomatic readings are usually the
	correct ones. We propose a heuristic for an A* parser for Tree Adjoining
	Grammars which benefits from this knowledge by promoting MWE-oriented analyses.
	This strategy leads to a substantial reduction in the parsing search space in
	case of true positive MWE occurrences, while avoiding parsing failures in case
	of false positives.},
  url       = {http://aclweb.org/anthology/C16-1042}
}

@InProceedings{ulinski-hirschberg-rambow:2016:COLING,
  author    = {Ulinski, Morgan  and  Hirschberg, Julia  and  Rambow, Owen},
  title     = {Incrementally Learning a Dependency Parser to Support Language Documentation in Field Linguistics},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {440--449},
  abstract  = {We present experiments in incrementally learning a dependency parser. The
	parser will be used in the WordsEye Linguistics Tools (WELT) (Ulinski et al.,
	2014) which supports field linguists documenting a language’s syntax and
	semantics. Our goal is to make syntactic annotation faster for field linguists.
	We have created a new parallel corpus of descriptions of spatial relations and
	motion events, based on pictures and video clips used by field linguists for
	elicitation of language from native speaker informants. We collected
	descriptions for each picture and video from native speakers in English,
	Spanish, German, and Egyptian Arabic. We compare the performance of MSTParser
	(McDonald et al., 2006) and MaltParser (Nivre et al., 2006) when trained on
	small amounts of this data. We find that MaltParser achieves the best
	performance. We also present the results of experiments using the parser to
	assist with annotation. We find that even when the parser is trained on a
	single sentence from the corpus, annotation time significantly decreases.},
  url       = {http://aclweb.org/anthology/C16-1043}
}

@InProceedings{zennaki-semmar-besacier:2016:COLING,
  author    = {ZENNAKI, Othman  and  Semmar, Nasredine  and  Besacier, Laurent},
  title     = {Inducing Multilingual Text Analysis Tools Using Bidirectional Recurrent Neural Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {450--460},
  abstract  = {This work focuses on the development of linguistic analysis tools for
	resource-poor languages. We use a parallel corpus to produce a multilingual
	word representation based only on sentence level alignment. This representation
	is combined with the annotated source side (resource-rich language) of the
	parallel corpus to train text analysis tools for resource-poor languages. Our
	approach is based on Recurrent Neural Networks (RNN) and has the following
	advantages: (a) it does not use word alignment information, (b) it does not
	assume any knowledge about foreign languages, which makes it applicable to a
	wide range of resource-poor languages, (c) it provides truly multilingual
	taggers.  
	In a previous study, we proposed a method based on Simple RNN to automatically
	induce a Part-Of-Speech (POS) tagger. In this paper, we propose an improvement
	of our neural model. We investigate the Bidirectional RNN and the inclusion of
	external information (for instance low level information from Part-Of-Speech
	tags) in the RNN to train a more complex tagger (for instance, a multilingual
	super sense tagger). We demonstrate the validity and genericity of our method
	by using parallel corpora (obtained by manual or automatic translation). Our
	experiments are conducted to induce cross-lingual POS and super sense taggers.},
  url       = {http://aclweb.org/anthology/C16-1044}
}

@InProceedings{zhang-EtAl:2016:COLING1,
  author    = {Zhang, Dongxu  and  Zhang, Boliang  and  Pan, Xiaoman  and  Feng, Xiaocheng  and  Ji, Heng  and  XU, Weiran},
  title     = {Bitext Name Tagging for Cross-lingual Entity Annotation Projection},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {461--470},
  abstract  = {Annotation projection is a practical method to deal with the low resource
	problem in incident languages (IL) processing. Previous methods on annotation
	projection mainly relied on word alignment results without any training
	process, which led to noise propagation caused by word alignment errors. In
	this paper, we focus on the named entity recognition (NER) task and propose a
	weakly-supervised framework to project entity annotations from English to IL
	through bitexts. Instead of directly relying on word alignment results, this
	framework combines advantages of rule-based methods and deep learning methods
	by implementing two steps: First, generates a high-confidence entity annotation
	set on IL side with strict searching methods; Second, uses this high-confidence
	set to weakly supervise the model training. The model is finally used to
	accomplish the projecting process. Experimental results on two low-resource ILs
	show that the proposed method can generate better annotations projected from
	English-IL parallel corpora. The performance of IL name tagger can also be
	improved significantly by training on the newly projected IL annotation set.},
  url       = {http://aclweb.org/anthology/C16-1045}
}

@InProceedings{salehi-cook-baldwin:2016:COLING,
  author    = {Salehi, Bahar  and  Cook, Paul  and  Baldwin, Timothy},
  title     = {Determining the Multiword Expression Inventory of a Surprise Language},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {471--481},
  abstract  = {Much previous research on multiword expressions (MWEs) has focused on the
	token- and type-level tasks of MWE identification and extraction, respectively.
	Such studies typically target known prevalent MWE types in a given language.
	This paper describes the first attempt to learn the MWE inventory of a
	“surprise” language for which we have no explicit prior knowledge of MWE
	patterns, certainly no annotated MWE data, and not even a parallel corpus. Our
	proposed model is trained on a treebank with MWE relations of a source
	language, and can be applied to the monolingual corpus of the surprise language
	to identify its MWE construction types.},
  url       = {http://aclweb.org/anthology/C16-1046}
}

@InProceedings{akhtar-EtAl:2016:COLING,
  author    = {Akhtar, Md Shad  and  Kumar, Ayush  and  Ekbal, Asif  and  Bhattacharyya, Pushpak},
  title     = {A Hybrid Deep Learning Architecture for Sentiment Analysis},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {482--493},
  abstract  = {In this paper, we propose a novel hybrid deep learning archtecture which is
	highly efficient for sentiment analysis in resource-poor languages. We learn
	sentiment embedded vectors from the Convolutional Neural Network (CNN). These
	are augmented to a set of optimized features selected through a multi-objective
	optimization (MOO) framework. The sentiment augmented optimized vector obtained
	at the end is used for the training of SVM for sentiment classification. We
	evaluate our proposed approach for coarse-grained (i.e. sentence level) as well
	as fine-grained (i.e. aspect level) sentiment analysis on four Hindi datasets
	covering varying domains. In order to show that our proposed method is generic
	in nature we also evaluate it on two benchmark English datasets. Evaluation
	shows that the results of the proposed method are consistent across all the
	datasets and often outperforms the state-of-art systems. To the best of our
	knowledge, this is the very first attempt where such a deep learning model is
	used for less-resourced languages such as Hindi.},
  url       = {http://aclweb.org/anthology/C16-1047}
}

@InProceedings{krishna-EtAl:2016:COLING,
  author    = {Krishna, Amrith  and  Santra, Bishal  and  Satuluri, Pavankumar  and  Bandaru, Sasi Prasanth  and  Faldu, Bhumi  and  Singh, Yajuvendra  and  Goyal, Pawan},
  title     = {Word Segmentation in Sanskrit Using Path Constrained Random Walks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {494--504},
  abstract  = {In Sanskrit, the phonemes at the word boundaries undergo changes to form new
	phonemes through a process called as sandhi. A fused sentence can be segmented
	into multiple possible segmentations. We propose a word segmentation approach
	that predicts the most semantically valid segmentation for a given sentence. We
	treat the problem as a query expansion problem and use the path-constrained
	random walks framework to predict the correct segments.},
  url       = {http://aclweb.org/anthology/C16-1048}
}

@InProceedings{wang-bao-gao:2016:COLING,
  author    = {Wang, Weihua  and  Bao, Feilong  and  Gao, Guanglai},
  title     = {Mongolian Named Entity Recognition System with Rich Features},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {505--512},
  abstract  = {In this paper, we first build a manually annotated named entity corpus of
	Mongolian. Then, we propose three morphological processing methods and study
	comprehensive features, including syllable features, lexical features, context
	features, morphological features and semantic features in Mongolian named
	entity recognition. Moreover, we also evaluate the influence of word cluster
	features on the system and combine all features together eventually. The
	experimental result shows that segmenting each suffix into an individual token
	achieves better results than deleting suffixes or using the suffixes as
	feature. The system based on segmenting suffixes with all proposed features
	yields benchmark result of F-measure=84.65 on this corpus.
	Author{3}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1049}
}

@InProceedings{shafieibavani-EtAl:2016:COLING,
  author    = {ShafieiBavani, Elaheh  and  Ebrahimi, Mohammad  and  Wong, Raymond  and  Chen, Fang},
  title     = {Appraising UMLS Coverage for Summarizing Medical Evidence},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {513--524},
  abstract  = {When making clinical decisions, practitioners need to rely on the most relevant
	evidence available. However, accessing a vast body of medical evidence and
	confronting with the issue of information overload can be challenging and time
	consuming. This paper proposes an effective summarizer for medical evidence by
	utilizing both UMLS and WordNet. Given a clinical query and a set of relevant
	abstracts, our aim is to generate a fluent, well-organized, and compact summary
	that answers the query. Analysis via ROUGE metrics shows that using WordNet as
	a general-purpose lexicon helps to capture the concepts not covered by the UMLS
	Metathesaurus, and hence significantly increases the performance. The
	effectiveness of our proposed approach is demonstrated by conducting a set of
	experiments over a specialized evidence-based medicine (EBM) corpus - which has
	been gathered and annotated for the purpose of biomedical text summarization.},
  url       = {http://aclweb.org/anthology/C16-1050}
}

@InProceedings{cevahir-murakami:2016:COLING,
  author    = {Cevahir, Ali  and  Murakami, Koji},
  title     = {Large-scale Multi-class and Hierarchical Product Categorization for an E-commerce Giant},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {525--535},
  abstract  = {In order to organize the large number of products listed in e-commerce sites,
	each product is usually assigned to one of the multi-level categories in the
	taxonomy tree. It is a time-consuming and difficult task for merchants to
	select proper categories within thousands of options for the products they
	sell. In this work, we propose an automatic classification tool to predict the
	matching category for a given product title and description. We used a
	combination of two different neural models, i.e., deep belief nets and deep
	autoencoders, for both titles and descriptions. We implemented a selective
	reconstruction approach for the input layer during the training of the deep
	neural networks, in order to scale-out for large-sized sparse feature vectors.
	GPUs are utilized in order to train neural networks in a reasonable time.  We
	have trained our models for around 150 million products with a taxonomy tree
	with at most 5 levels that contains 28,338 leaf categories. Tests with millions
	of products show that our first predictions matches 81% of  merchants'
	assignments, when "others" categories are excluded.},
  url       = {http://aclweb.org/anthology/C16-1051}
}

@InProceedings{gupta-EtAl:2016:COLING1,
  author    = {Gupta, Vivek  and  Karnick, Harish  and  Bansal, Ashendra  and  Jhala, Pradhuman},
  title     = {Product Classification in E-Commerce using Distributional Semantics},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {536--546},
  abstract  = {Product classification is the task of automatically predicting a taxonomy path
	for a product in a  predefined taxonomy hierarchy given a textual product
	description or title. For efficient product classification we require a
	suitable representation for a document (the textual description of a product)
	feature vector and efficient and fast algorithms for prediction.To address the
	above challenges, we propose a new distributional semantics representation for
	document vector formation. We also develop a new two-level ensemble approach
	utilising (with respect to the taxonomy tree) path-wise, node-wise and
	depth-wise classifiers to reduce error in the final product classification
	task. Our experiments show the effectiveness of the distributional
	representation and the ensemble approach on data sets from a leading e-commerce
	platform and achieve improved results on various evaluation metrics compared to
	earlier approaches.},
  url       = {http://aclweb.org/anthology/C16-1052}
}

@InProceedings{cao-EtAl:2016:COLING1,
  author    = {Cao, Ziqiang  and  Li, Wenjie  and  Li, Sujian  and  Wei, Furu  and  Li, Yanran},
  title     = {AttSum: Joint Learning of Focusing and Summarization with Neural Attention},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {547--556},
  abstract  = {Query relevance ranking and sentence saliency ranking are the two main tasks in
	extractive query-focused summarization.  Previous supervised summarization
	systems often perform the two tasks in isolation. However, since reference
	summaries are the trade-off between relevance and saliency, using them as
	supervision, neither of the two rankers could be trained well. This paper
	proposes a novel summarization system called AttSum, which tackles the two
	tasks jointly. It automatically learns distributed representations for
	sentences as well as the document cluster. Meanwhile, it applies the attention
	mechanism to simulate the attentive reading of human behavior when a query is
	given. Extensive experiments are conducted on DUC query-focused summarization
	benchmark datasets. Without using any hand-crafted features, AttSum achieves
	competitive performance. We also observe that the sentences recognized to focus
	on the query indeed meet the query need.},
  url       = {http://aclweb.org/anthology/C16-1053}
}

@InProceedings{li-EtAl:2016:COLING2,
  author    = {Li, Chen  and  Wei, Zhongyu  and  Liu, Yang  and  Jin, Yang  and  Huang, Fei},
  title     = {Using Relevant Public Posts to Enhance News Article Summarization},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {557--566},
  abstract  = {A news article summary usually consists of 2-3 key sentences that reflect the
	gist of that news article. In this paper we explore using public posts
	following a new article to improve automatic summary generation for the news
	article. We propose different approaches to incorporate information from public
	posts, including using frequency information from the posts to re-estimate
	bigram weights in the ILP-based summarization model and to re-weight a
	dependency tree edge's importance for sentence compression, directly selecting
	sentences from posts as the final summary, and finally a strategy to combine
	the summarization results generated from news articles and posts. Our
	experiments on data collected from Facebook show that relevant public posts
	provide useful information and can be effectively leveraged to improve news
	article summarization results.},
  url       = {http://aclweb.org/anthology/C16-1054}
}

@InProceedings{fang-EtAl:2016:COLING,
  author    = {Fang, Yimai  and  Zhu, Haoyue  and  Muszy\'{n}ska, Ewa  and  Kuhnle, Alexander  and  Teufel, Simone},
  title     = {A Proposition-Based Abstractive Summariser},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {567--578},
  abstract  = {Abstractive summarisation is not yet common amongst today's deployed and
	research systems. Most existing systems either extract sentences or compress
	individual sentences.
	In this paper, we present a summariser that works by a different paradigm. It
	is a further development of an existing summariser that has an incremental,
	proposition-based content selection process but lacks a natural language (NL)
	generator for the final output. Using an NL generator, we can now produce the
	summary text to directly reflect the selected propositions. Our evaluation
	compares textual quality of our system to the earlier preliminary output
	method, and also uses ROUGE to compare to various summarisers that use the
	traditional method of sentence extraction, followed by compression. Our results
	suggest that cutting out the middle-man of sentence extraction can lead to
	better abstractive summaries.},
  url       = {http://aclweb.org/anthology/C16-1055}
}

@InProceedings{evang-bos:2016:COLING,
  author    = {Evang, Kilian  and  Bos, Johan},
  title     = {Cross-lingual Learning of an Open-domain Semantic Parser},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {579--588},
  abstract  = {We propose a method for learning semantic CCG parsers by projecting annotations
	via a parallel corpus. The method opens an avenue towards cheaply creating
	multilingual semantic parsers mapping open-domain text to formal meaning
	representations. A first cross-lingually learned Dutch (from English) semantic
	parser obtains f-scores ranging from 42.99% to 69.22% depending on the level of
	label informativity taken into account, compared to 58.40% to 78.88% for the
	underlying source-language system. These are promising numbers compared to
	state-of-the-art semantic parsing in open domains.},
  url       = {http://aclweb.org/anthology/C16-1056}
}

@InProceedings{zhao-liu:2016:COLING,
  author    = {Zhao, Qiuye  and  Liu, Qun},
  title     = {A subtree-based factorization of dependency parsing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {589--598},
  abstract  = {We propose a dependency parsing pipeline, in which the parsing of long-distance
	projections and localized dependencies are explicitly decomposed at the input
	level. A chosen baseline dependency parsing model performs only on ’carved’
	sequences at the second stage, which are transformed from coarse constituent
	parsing outputs at the first stage. When k-best constituent parsing outputs are
	kept, a third-stage is required to search for an optimal combination of the
	overlapped dependency subtrees. In this sense, our dependency model is
	subtree-factored. We explore alternative approaches for scoring subtrees,
	including feature-based models as well as continuous representations. The
	search for optimal subset to combine is formulated as an ILP problem. This
	framework especially benefits the models poor on long sentences, generally
	improving baselines by 0.75-1.28 (UAS) on English, achieving comparable
	performance with high-order models but faster. For Chinese, the most notable
	increase is as high as 3.63 (UAS) when the proposed framework is applied to
	first-order parsing models.},
  url       = {http://aclweb.org/anthology/C16-1057}
}

@InProceedings{akbik-li:2016:COLING,
  author    = {Akbik, Alan  and  Li, Yunyao},
  title     = {K-SRL: Instance-based Learning for Semantic Role Labeling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {599--608},
  abstract  = {Semantic role labeling (SRL) is the task of identifying and labeling
	predicate-argument structures in sentences with semantic frame and role labels.
	A known challenge in SRL is the large number of low-frequency exceptions in
	training data, which are highly context-specific and difficult to generalize.
	To overcome this challenge, we propose the use of instance-based learning that
	performs no explicit generalization, but rather extrapolates predictions from
	the most similar instances in the training data. We present a variant of
	k-nearest neighbors (kNN) classification with composite features to identify
	nearest neighbors for SRL. We show that high-quality predictions can be derived
	from a very small number of similar instances. In a comparative evaluation we
	experimentally demonstrate that our instance-based learning approach
	significantly outperforms current state-of-the-art systems on both in-domain
	and out-of-domain data, reaching F1-scores of 89,28% and 79.91% respectively.},
  url       = {http://aclweb.org/anthology/C16-1058}
}

@InProceedings{plank:2016:COLING,
  author    = {Plank, Barbara},
  title     = {Keystroke dynamics as signal for shallow syntactic parsing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {609--619},
  abstract  = {Keystroke dynamics have been extensively used in psycholinguistic and writing
	research to gain insights into cognitive processing. 
	But do keystroke logs contain actual signal that can be used to learn better
	natural language processing models?
	We postulate that keystroke dynamics contain information about syntactic
	structure that can inform shallow syntactic parsing. To test this hypothesis, 
	we explore labels derived from keystroke logs as auxiliary task in a multi-task
	bidirectional Long Short-Term Memory (bi-LSTM). Our results show promising
	results on two shallow syntactic parsing tasks, chunking and CCG supertagging.
	Our model is simple, has the advantage that data can come from distinct
	sources, and produces models that are significantly better than models trained
	on the text annotations alone.},
  url       = {http://aclweb.org/anthology/C16-1059}
}

@InProceedings{ostling:2016:COLING,
  author    = {\"{O}stling, Robert},
  title     = {A Bayesian model for joint word alignment and part-of-speech transfer},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {620--629},
  abstract  = {Current methods for word alignment require considerable amounts of
	parallel text to deliver accurate results, a requirement which is met only for
	a small minority of the world's approximately 7,000 languages.
	We show that by jointly performing word alignment and annotation transfer in
	a novel Bayesian model, alignment accuracy can be
	improved for language pairs where annotations are available for only
	one of the languages---a finding which could facilitate the study and
	processing of a vast number of low-resource languages.
	We also present an evaluation where our method is used to perform
	single-source and multi-source part-of-speech transfer with 22 translations
	of the same text in four different languages. This allows us to quantify the
	considerable variation in accuracy depending on the specific source text(s)
	used, even with different translations into the same language.},
  url       = {http://aclweb.org/anthology/C16-1060}
}

@InProceedings{shapiro:2016:COLING,
  author    = {Shapiro, Naomi Tachikawa},
  title     = {Splitting compounds with ngrams},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {630--640},
  abstract  = {Compound words with unmarked word boundaries are problematic for many tasks in
	NLP and computational linguistics, including information extraction, machine
	translation, and syllabification. This paper introduces a simple,
	proof-of-concept language modeling approach to automatic compound segmentation,
	as applied to Finnish. This approach utilizes an off-the-shelf morphological
	analyzer to split training words into their constituent morphemes. A language
	model is subsequently trained on ngrams composed of morphemes, morpheme
	boundaries, and word boundaries. Linguistic constraints are then used to weed
	out phonotactically ill-formed segmentations, thereby allowing the language
	model to select the best grammatical segmentation. This approach achieves an
	accuracy of ~97\%.},
  url       = {http://aclweb.org/anthology/C16-1061}
}

@InProceedings{feng-EtAl:2016:COLING1,
  author    = {Feng, Jun  and  Huang, Minlie  and  Yang, Yang  and  zhu, xiaoyan},
  title     = {GAKE: Graph Aware Knowledge Embedding},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {641--651},
  abstract  = {Knowledge embedding, which projects triples in a given knowledge base to
	d-dimensional vectors,
	has attracted considerable research efforts recently. Most existing approaches
	treat the given
	knowledge base as a set of triplets, each of whose representation is then
	learned separately. However, as a fact, triples are connected and depend on
	each other. In this paper, we propose a graph aware knowledge embedding method
	(GAKE), which formulates knowledge base as a directed graph, and learns
	representations for any vertices or edges by leveraging the graph’s
	structural information. We introduce three types of graph context for
	embedding: neighbor context, path context, and edge context, each reflects
	properties of knowledge from different perspectives. We also design an
	attention mechanism to learn representative power of different vertices or
	edges. To validate our method, we conduct several experiments on two tasks.
	Experimental results
	suggest that our method outperforms several state-of-art knowledge embedding
	models.
	Author{4}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1062}
}

@InProceedings{wu-wang-xue:2016:COLING,
  author    = {Wu, Bowen  and  Wang, Baoxun  and  Xue, Hui},
  title     = {Ranking Responses Oriented to Conversational Relevance in Chat-bots},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {652--662},
  abstract  = {For automatic chatting systems, it is indeed a great challenge to reply the
	given query considering the conversation history, rather than based on the
	query only. This paper proposes a deep neural network to address the
	context-aware response ranking problem by end-to-end learning, so as to help to
	select conversationally relevant candidate. By combining the multi-column
	convolutional layer and the recurrent layer, our model is able to model the
	semantics of the utterance sequence by grasping the semantic clue within the
	conversation, on the basis of the effective representation for each sentence.
	Especially, the network utilizes attention pooling to further emphasis the
	importance of essential words in conversations, thus the representations of
	contexts tend to be more meaningful and the performance of candidate ranking is
	notably improved. Meanwhile, due to the adoption of attention pooling, it is
	possible to visualize the semantic clues. The experimental results on the large
	amount of conversation data from social media have shown that our approach is
	promising for quantifying the conversational relevance of responses, and
	indicated its good potential for building practical IR based chat-bots.},
  url       = {http://aclweb.org/anthology/C16-1063}
}

@InProceedings{lee-hwang-wang:2016:COLING,
  author    = {Lee, Taesung  and  Hwang, Seung-won  and  Wang, Zhongyuan},
  title     = {Probabilistic Prototype Model for Serendipitous Property Mining},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {663--673},
  abstract  = {Besides providing the relevant information, amusing users has been an important
	role of the web. Many web sites provide serendipitous (unexpected but relevant)
	information to draw user traffic. In this paper, we study the representative
	scenario of mining an amusing quiz. An existing approach leverages a knowledge
	base to mine an unexpected property then find quiz questions on such property,
	based on prototype theory in cognitive science. However, existing deterministic
	model is vulnerable to noise in the knowledge base. Therefore, we instead
	propose to leverage probabilistic approach to build a prototype that can
	overcome noise. Our extensive empirical study shows that our approach not only
	significantly outperforms baselines by 0.06 in accuracy, and 0.11 in
	serendipity but also shows higher relevance than the traditional
	relevance-pursuing baseline using TF-IDF.},
  url       = {http://aclweb.org/anthology/C16-1064}
}

@InProceedings{garimella-mihalcea-pennebaker:2016:COLING,
  author    = {Garimella, Aparna  and  Mihalcea, Rada  and  Pennebaker, James},
  title     = {Identifying Cross-Cultural Differences in Word Usage},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {674--683},
  abstract  = {Personal writings have inspired researchers in the fields of linguistics and
	psychology to study the relationship between language and culture to better
	understand the psychology of people across different cultures. 
	In this paper, we explore this relation by developing cross-cultural word
	models to identify words with cultural bias -- i.e., words that are used in
	significantly different ways by speakers from different cultures. 
	Focusing specifically on two cultures: United States and Australia, we identify
	a set of words with significant usage differences, and further investigate
	these words through feature analysis and topic modeling, shedding light on the 
	attributes of language that contribute to these differences.},
  url       = {http://aclweb.org/anthology/C16-1065}
}

@InProceedings{asahara-ono-miyamoto:2016:COLING,
  author    = {Asahara, Masayuki  and  Ono, Hajime  and  Miyamoto, Edson T.},
  title     = {Reading-Time Annotations for "Balanced Corpus of Contemporary Written Japanese"},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {684--694},
  abstract  = {The Dundee Eyetracking Corpus contains
	 eyetracking data collected while native speakers of English and French read
	 newspaper editorial articles. 
	 Similar resources for other languages are still rare, especially for languages
	in which words are not overtly delimited with spaces.
	 This is a report on a project to build an eyetracking corpus for Japanese.
	 Measurements were collected while 24 native speakers of
	 Japanese read excerpts from the Balanced Corpus of Contemporary Written
	 Japanese 
	 Texts were presented with or without segmentation (i.e. with or without space
	 at the boundaries between bunsetsu segmentations) and with two types
	 of methodologies (eyetracking and self-paced reading presentation). 
	 Readers' background information including vocabulary-size estimation
	and Japanese reading-span score were
	 also collected.
	 As an example of the  possible uses for the corpus, we also report analyses
	 investigating the phenomena of 
	 anti-locality.},
  url       = {http://aclweb.org/anthology/C16-1066}
}

@InProceedings{nand-perera-kasture:2016:COLING,
  author    = {Nand, Parma  and  Perera, Rivindu  and  Kasture, Abhijeet},
  title     = {"How Bullying is this Message?": A Psychometric Thermometer for Bullying},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {695--706},
  abstract  = {Cyberbullying statistics are shocking, the number of affected young people is
	increasing dramatically with the affordability of mobile technology devices
	combined with a growing number of social networks. This paper proposes a
	framework to analyse Tweets with the goal to identify cyberharassment in social
	networks as an important step to protect people from cyberbullying. The
	proposed framework incorporates latent or hidden variables with supervised
	learning to determine potential bullying cases resembling short blogging type
	texts such as Tweets. It uses the LIWC2007 - tool that translates Tweet
	messages into 67 numeric values, representing 67 word categories. The output
	vectors are then used as features for four different classifiers implemented in
	Weka. Tests on all four classifiers delivered encouraging predictive capability
	of Tweet messages.  Overall it was found that the use of numeric psychometric
	values outperformed the same algorithms using both filtered and unfiltered
	words as features.  The best performing algorithms was Random Forest with an
	F1-value of 0.947 using psychometric features compared to a value of 0.847 for
	the same algorithm using words as features.},
  url       = {http://aclweb.org/anthology/C16-1067}
}

@InProceedings{yatbaz-EtAl:2016:COLING,
  author    = {Yatbaz, Mehmet Ali  and  Cirik, Volkan  and  K\"{u}ntay, Aylin  and  Yuret, Deniz},
  title     = {Learning grammatical categories using paradigmatic representations: Substitute words for language acquisition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {707--716},
  abstract  = {Learning syntactic categories is a fundamental task in language acquisition.
	Previous studies show that co-occurrence patterns of preceding and following
	words are essential to group words into categories.
	However, the neighboring words, or frames, are rarely repeated exactly in the
	data. This creates data sparsity and hampers learning for frame based models.
	In this work, we propose a paradigmatic representation of word context which
	uses probable substitutes instead of frames.
	Our experiments on child-directed speech show that models based on probable
	substitutes learn more accurate categories with fewer examples compared to
	models based on
	frames.},
  url       = {http://aclweb.org/anthology/C16-1068}
}

@InProceedings{paetzold-specia:2016:COLING1,
  author    = {Paetzold, Gustavo  and  Specia, Lucia},
  title     = {Understanding the Lexical Simplification Needs of Non-Native Speakers of English},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {717--727},
  abstract  = {We report three user studies in which the Lexical Simplification needs of
	non-native English speakers are investigated. Our analyses feature valuable new
	insight on the relationship between the non-natives' notion of complexity and
	various morphological, semantic and lexical word properties. Some of our
	findings contradict long-standing misconceptions about word simplicity. The
	data produced in our studies consists of 211,564 annotations made by 1,100
	volunteers, which we hope will guide forthcoming research on Text
	Simplification for non-native speakers of English.},
  url       = {http://aclweb.org/anthology/C16-1069}
}

@InProceedings{alam-EtAl:2016:COLING,
  author    = {Alam, Firoj  and  Chowdhury, Shammur Absar  and  Danieli, Morena  and  Riccardi, Giuseppe},
  title     = {How Interlocutors Coordinate with each other within Emotional Segments?},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {728--738},
  abstract  = {In this paper, we aim to investigate the coordination of interlocutors behavior
	in different emo- tional segments. Conversational coordination between the
	interlocutors is the tendency of speak- ers to predict and adjust each other
	accordingly on an ongoing conversation. In order to find such a coordination,
	we investigated 1) lexical similarities between the speakers in each emotional
	seg- ments, 2) correlation between the interlocutors using psycholinguistic
	features, such as linguistic styles, psychological process, personal concerns
	among others, and 3) relation of interlocutors turn-taking behaviors such as
	competitiveness. To study the degree of coordination in different emotional
	segments, we conducted our experiments using real dyadic conversations
	collected from call centers in which agent’s emotional state include empathy
	and customer’s emotional states include anger and frustration. Our findings
	suggest that the most coordination occurs be- tween the interlocutors inside
	anger segments, where as, a little coordination was observed when the agent was
	empathic, even though an increase in the amount of non-competitive overlaps was
	observed. We found no significant difference between anger and frustration
	segment in terms of turn-taking behaviors. However, the length of pause
	significantly decreases in the preceding segment of anger where as it increases
	in the preceding segment of frustration.},
  url       = {http://aclweb.org/anthology/C16-1070}
}

@InProceedings{bykh-meurers:2016:COLING,
  author    = {Bykh, Serhiy  and  Meurers, Detmar},
  title     = {Advancing Linguistic Features and Insights by Label-informed Feature Grouping: An Exploration in the Context of Native Language Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {739--749},
  abstract  = {We propose a hierarchical clustering approach designed to group linguistic
	features for supervised machine learning that is inspired by variationist
	linguistics. The method makes it possible to abstract away from the individual
	feature occurrences by grouping features together that behave alike with
	respect to the target class, thus providing a new, more general perspective on
	the data. On the one hand, it reduces data sparsity, leading to quantitative
	performance gains. On the other, it supports the formation and evaluation of
	hypotheses about individual choices of linguistic structures. We explore the
	method using features based on verb subcategorization information and evaluate
	the approach in the context of the Native Language Identification (NLI) task.},
  url       = {http://aclweb.org/anthology/C16-1071}
}

@InProceedings{rubino-EtAl:2016:COLING,
  author    = {Rubino, Raphael  and  Degaetano-Ortlieb, Stefania  and  Teich, Elke  and  van Genabith, Josef},
  title     = {Modeling Diachronic Change in Scientific Writing with Information Density},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {750--761},
  abstract  = {Previous linguistic research on scientific writing has shown that language use
	in the scientific domain varies considerably in register and style over time.
	In this paper we investigate the introduction of information theory inspired
	features to study long term diachronic change on three levels: lexis,
	part-of-speech and syntax. Our approach is based on distinguishing between
	sentences from 19th and 20th century scientific abstracts using supervised
	classification models. To the best of our knowledge, the introduction of
	information theoretic features to this task is novel. We show that these
	features outperform more traditional features, such as token or character
	n-grams, while leading to more compact models. We present a detailed analysis
	of feature informativeness in order to gain a better understanding of
	diachronic change on different linguistic levels.},
  url       = {http://aclweb.org/anthology/C16-1072}
}

@InProceedings{hu-zhang-zheng:2016:COLING,
  author    = {Hu, Wenpeng  and  Zhang, Jiajun  and  Zheng, Nan},
  title     = {Different Contexts Lead to Different Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {762--771},
  abstract  = {Recent work for learning word representations has applied successfully to many
	NLP applications, such as sentiment analysis and question answering. However,
	most of these models assume a single vector per word type without considering
	polysemy and homonymy. In this paper, we present an extension to the CBOW model
	which not only improves the quality of embeddings but also makes embeddings
	suitable for polysemy. It differs from most of the related work in that it
	learns one semantic center embedding and one context bias instead of training
	multiple embeddings per word type. Different context leads to different bias
	which is defined as the weighted average embeddings of local context.
	Experimental results on similarity task and analogy task show that the word
	representations learned by the proposed method outperform the competitive
	baselines.},
  url       = {http://aclweb.org/anthology/C16-1073}
}

@InProceedings{agirrezabal-alegria-hulden:2016:COLING,
  author    = {Agirrezabal, Manex  and  Alegria, I\~{n}aki  and  Hulden, Mans},
  title     = {Machine Learning for Metrical Analysis of English Poetry},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {772--781},
  abstract  = {In this work we tackle the challenge of identifying rhythmic patterns in poetry
	written in English.  Although poetry is a literary form that makes use standard
	meters usually repeated among different authors, we will see in this paper how
	performing such analyses is a difficult task in machine learning due to the
	unexpected deviations from such standard patterns. After breaking down some
	examples of classical poetry, we apply a number of NLP techniques for the
	scansion of poetry, training and testing our systems against a human-annotated
	corpus. With these experiments, our purpose is establish a baseline of
	automatic scansion of poetry using NLP tools in a straightforward manner and to
	raise awareness of the difficulties of this task.},
  url       = {http://aclweb.org/anthology/C16-1074}
}

@InProceedings{moore-EtAl:2016:COLING,
  author    = {Moore, Russell  and  Caines, Andrew  and  Graham, Calbert  and  Buttery, Paula},
  title     = {Automated speech-unit delimitation in spoken learner English},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {782--793},
  abstract  = {In order to apply computational linguistic analyses and pass information to
	downstream appli- cations, transcriptions of speech obtained via automatic
	speech recognition (ASR) need to be divided into smaller meaningful units, in a
	task we refer to as ‘speech-unit (SU) delimitation’. We closely recreate
	the automatic delimitation system described by Lee and Glass (2012), ‘Sen-
	tence detection using multiple annotations’, Proceedings of INTERSPEECH,
	which combines a prosodic model, language model and speech-unit length model in
	log-linear fashion. Since state- of-the-art natural language processing (NLP)
	tools have been developed to deal with written text and its characteristic
	sentence-like units, SU delimitation helps bridge the gap between ASR and NLP,
	by normalising spoken data into a more canonical format. Previous work has
	focused on native speaker recordings; we test the system of Lee and Glass
	(2012) on non-native speaker (or ‘learner’) data, achieving performance
	above the state-of-the-art. We also consider alternative evaluation metrics
	which move away from the idea of a single ‘truth’ in SU delimitation, and
	frame this work in the context of downstream NLP applications.},
  url       = {http://aclweb.org/anthology/C16-1075}
}

@InProceedings{song-EtAl:2016:COLING1,
  author    = {Song, Wei  and  Liu, Tong  and  Fu, Ruiji  and  Liu, Lizhen  and  Wang, Hanshi  and  Liu, Ting},
  title     = {Learning to Identify Sentence Parallelism in Student Essays},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {794--803},
  abstract  = {Parallelism is an important rhetorical device. We propose a machine learning
	approach for automated sentence parallelism identification in student essays.
	We build an essay dataset with sentence level parallelism annotated. We derive
	features by combining generalized word alignment strategies and the alignment
	measures between word sequences. The experimental results show that sentence
	parallelism can be effectively identified with a F1 score of 82% at pair-wise
	level and 72% at parallelism chunk level.Based on this approach, we
	automatically identify sentence parallelism in more than 2000 student essays
	and study the correlation between the use of sentence parallelism and the types
	and quality of essays.},
  url       = {http://aclweb.org/anthology/C16-1076}
}

@InProceedings{basaldella-chiaradia-tasso:2016:COLING,
  author    = {Basaldella, Marco  and  Chiaradia, Giorgia  and  Tasso, Carlo},
  title     = {Evaluating anaphora and coreference resolution to improve automatic keyphrase extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {804--814},
  abstract  = {In this paper we analyze the effectiveness of using linguistic knowledge from
	coreference and anaphora resolution for improving the performance for
	supervised keyphrase extraction. 
	In order to verify the impact of these features, we define a baseline 
	keyphrase extraction system and evaluate its performance on a standard dataset
	using different machine learning algorithms. 
	Then, we consider new sets of features by adding combinations of the linguistic
	features we propose and we evaluate the new performance of the system. We also
	use anaphora and coreference resolution to transform the documents, trying to
	simulate the cohesion process performed by the human mind. 
	We found that our approach has a slightly positive impact on the performance of
	automatic keyphrase extraction, in particular when considering the ranking of
	the results.},
  url       = {http://aclweb.org/anthology/C16-1077}
}

@InProceedings{ehrlemark-johansson-lyngfelt:2016:COLING,
  author    = {Ehrlemark, Anna  and  Johansson, Richard  and  Lyngfelt, Benjamin},
  title     = {Retrieving Occurrences of Grammatical Constructions},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {815--824},
  abstract  = {Finding authentic examples of grammatical constructions is central in
	constructionist approaches to linguistics, language processing, and second
	language learning. In this paper, we address this problem as an information
	retrieval (IR) task. To facilitate research in this area, we built a benchmark
	collection by annotating the occurrences of six constructions in a Swedish
	corpus.
	Furthermore, we implemented a simple and flexible retrieval system for finding
	construction occurrences, in which the user specifies a ranking function using
	lexical-semantic similarities (lexicon-based or distributional). The system was
	evaluated using standard IR metrics on the new benchmark, and we saw that
	lexical-semantical rerankers improve significantly over a purely
	surface-oriented system, but must be carefully tailored for each individual
	construction.},
  url       = {http://aclweb.org/anthology/C16-1078}
}

@InProceedings{felice-bryant-briscoe:2016:COLING,
  author    = {Felice, Mariano  and  Bryant, Christopher  and  Briscoe, Ted},
  title     = {Automatic Extraction of Learner Errors in ESL Sentences Using Linguistically Enhanced Alignments},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {825--835},
  abstract  = {We propose a new method of automatically extracting learner errors from
	parallel English as a Second Language (ESL) sentences in an effort to
	regularise annotation formats and reduce inconsistencies. Specifically, given
	an original and corrected sentence, our method first uses a linguistically
	enhanced alignment algorithm to determine the most likely mappings between
	tokens, and secondly employs a rule-based function to decide which alignments
	should be merged. Our method beats all previous approaches on the tested
	datasets, achieving state-of-the-art results for automatic error extraction.},
  url       = {http://aclweb.org/anthology/C16-1079}
}

@InProceedings{yamauchi-murawaki:2016:COLING,
  author    = {Yamauchi, Kenji  and  Murawaki, Yugo},
  title     = {Contrasting Vertical and Horizontal Transmission of Typological Features},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {836--846},
  abstract  = {Linguistic typology provides features that have a potential of uncovering deep
	phylogenetic relations among the world's languages.
	One of the key challenges in using typological features for phylogenetic
	inference is that horizontal (spatial) transmission obscures vertical
	(phylogenetic) signals.
	In this paper, we characterize typological features with respect to the
	relative strength of vertical and horizontal transmission.
	To do this, we first construct (1) a spatial neighbor graph of languages and
	(2) a phylogenetic neighbor graph by collapsing known language families.
	We then develop an autologistic model that predicts a feature's distribution
	from these two graphs.
	In the experiments, we managed to separate vertically and/or horizontally
	stable features from unstable ones, and the results are largely consistent with
	previous findings.},
  url       = {http://aclweb.org/anthology/C16-1080}
}

@InProceedings{mao-hulden:2016:COLING,
  author    = {Mao, Lingshuang  and  Hulden, Mans},
  title     = {How Regular is Japanese Loanword Adaptation? A Computational Study},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {847--856},
  abstract  = {The modifications that foreign loanwords undergo when adapted into Japanese
	have been the subject of much study in linguistics.  The scholarly interest of
	the topic can be attributed to the fact that Japanese loanwords undergo a
	complex series of phonological adaptations, something which has been puzzling
	scholars for decades. While previous studies of Japanese loanword accommodation
	have focused on specific phonological phenomena of limited scope, the current
	study leverages computational methods to provide a more complete description of
	all the sound changes that occur when adopting English words into Japanese.  To
	investigate this, we have developed a parallel corpus of 250 English
	transcriptions and their respective Japanese equivalents. These words were then
	used to develop a wide-coverage finite state transducer based phonological
	grammar that mimics the behavior of the Japanese adaption process.  By
	developing rules with the goal of accounting completely for a large number of
	borrowing and analyzing forms mistakenly generated by the system, we discovered
	an internal inconsistency inside the loanword phonology of the Japanese
	language, something arguably underestimated by previous studies. The result of
	the investigation suggests that there are multiple 'dimensions' that shape the
	output form of the current Japanese loanwords. These dimensions include
	orthography, phonetics, and historical changes.},
  url       = {http://aclweb.org/anthology/C16-1081}
}

@InProceedings{inurrieta-EtAl:2016:COLING,
  author    = {I\~{n}urrieta, Uxoa  and  Diaz de Ilarraza, Arantza  and  Labaka, Gorka  and  Sarasola, Kepa  and  Aduriz, Itziar  and  Carroll, John},
  title     = {Using Linguistic Data for English and Spanish Verb-Noun Combination Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {857--867},
  abstract  = {We present a linguistic analysis of a set of English and Spanish verb+noun
	combinations (VNCs), and a method to use this information to improve VNC
	identification. Firstly, a sample of frequent VNCs are analysed in-depth and
	tagged along lexico-semantic and morphosyntactic dimensions, obtaining
	satisfactory inter-annotator agreement scores. Then, a VNC identification
	experiment is undertaken, where the analysed linguistic data is combined with
	chunking information and syntactic dependencies. A comparison between the
	results of the experiment and the results obtained by a basic detection method
	shows that VNC identification can be greatly improved by using linguistic
	information, as a large number of additional occurrences are detected with high
	precision.},
  url       = {http://aclweb.org/anthology/C16-1082}
}

@InProceedings{terkik-EtAl:2016:COLING,
  author    = {Terkik, Andamlak  and  Prud'hommeaux, Emily  and  Ovesdotter Alm, Cecilia  and  Homan, Christopher  and  Franklin, Scott},
  title     = {Analyzing Gender Bias in Student Evaluations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {868--876},
  abstract  = {University students in the United States are routinely asked to provide
	feedback on the quality of the instruction they have received. Such feedback is
	widely used by university administrators to evaluate teaching ability, despite
	growing evidence that students assign lower numerical scores to women and
	people of color, regardless of the actual quality of instruction. In this
	paper, we analyze students’ written comments on faculty evaluation forms
	spanning eight years and five STEM disciplines in order to determine whether
	open-ended comments reflect these same biases. First, we apply sentiment
	analysis techniques to the corpus of comments to determine the overall affect
	of each comment. We then use this information, in combination with other
	features, to explore whether there is bias in how students describe their
	instructors. We show that while the gender of the evaluated instructor does not
	seem to affect students’ expressed level of overall satisfaction with their
	instruction, it does strongly influence the language that they use to describe
	their instructors and their experience in class.},
  url       = {http://aclweb.org/anthology/C16-1083}
}

@InProceedings{huynh-EtAl:2016:COLING,
  author    = {Huynh, Trung  and  He, Yulan  and  Willis, Alistair  and  Rueger, Stefan},
  title     = {Adverse Drug Reaction Classification With Deep Neural Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {877--887},
  abstract  = {We study the problem of detecting sentences describing adverse drug reactions
	(ADRs) and frame the problem as binary classification. We investigate different
	neural network (NN) architectures for ADR classification. In particular, we
	propose two new neural network models, Convolutional Recurrent Neural Network
	(CRNN) by concatenating convolutional neural networks with recurrent neural
	networks, and Convolutional Neural Network with Attention (CNNA) by adding
	attention weights into convolutional neural networks. We evaluate various NN
	architectures on a Twitter dataset containing informal language and an Adverse
	Drug Effects (ADE) dataset constructed by sampling from MEDLINE case reports.
	Experimental results show that all the NN architectures outperform the
	traditional maximum entropy classifiers trained from n-grams with different
	weighting strategies considerably on both datasets. On the Twitter dataset, all
	the NN architectures perform similarly. But on the ADE dataset, CNN performs
	better than other more complex CNN variants. Nevertheless, CNNA allows the
	visualisation of attention weights of words when making classification
	decisions and hence is more appropriate for the extraction of word subsequences
	describing ADRs.},
  url       = {http://aclweb.org/anthology/C16-1084}
}

@InProceedings{huang-shao-chen:2016:COLING,
  author    = {Huang, Hen-Hsen  and  Shao, Yen-Chi  and  Chen, Hsin-Hsi},
  title     = {Chinese Preposition Selection for Grammatical Error Diagnosis},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {888--899},
  abstract  = {Misuse of Chinese prepositions is one of common word usage errors in
	grammatical error diagnosis. In this paper, we adopt the Chinese Gigaword
	corpus and HSK corpus as L1 and L2 corpora, respectively. We explore gated
	recurrent neural network model (GRU), and an ensemble of GRU model and maximum
	entropy language model (GRU-ME) to select the best preposition from 43
	candidates for each test sentence. The experimental results show the advantage
	of the GRU models over simple RNN and n-gram models. We further analyze the
	effectiveness of linguistic information such as word boundary and
	part-of-speech tag in this task.},
  url       = {http://aclweb.org/anthology/C16-1085}
}

@InProceedings{eskander-rambow-yang:2016:COLING,
  author    = {Eskander, Ramy  and  Rambow, Owen  and  Yang, Tianchun},
  title     = {Extending the Use of Adaptor Grammars for Unsupervised Morphological Segmentation of Unseen Languages},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {900--910},
  abstract  = {We investigate using Adaptor Grammars for unsupervised morphological
	  segmentation.  Using six development languages, we investigate in detail
	  different grammars, the use of morphological knowledge from outside
	  sources, and the use of a cascaded architecture.  Using cross-validation
	  on our development languages, we propose a system which is
	  language-independent.  We show that it outperforms two state-of-the-art
	  systems on 5 out of 6 languages.},
  url       = {http://aclweb.org/anthology/C16-1086}
}

@InProceedings{kuru-can-yuret:2016:COLING,
  author    = {Kuru, Onur  and  Can, Ozan Arkan  and  Yuret, Deniz},
  title     = {CharNER: Character-Level Named Entity Recognition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {911--921},
  abstract  = {We describe and evaluate a character-level tagger for language-independent
	Named Entity Recognition (NER).
	Instead of words, a sentence is represented as a sequence of characters.
	The model consists of stacked bidirectional LSTMs which inputs characters and
	outputs tag probabilities for each character.  These probabilities are then
	converted to consistent word level named entity tags using a Viterbi decoder. 
	We are able to achieve close to state-of-the-art NER performance in seven
	languages with the same basic model using only labeled NER data and no
	hand-engineered features or other external resources like syntactic taggers or
	Gazetteers.},
  url       = {http://aclweb.org/anthology/C16-1087}
}

@InProceedings{hardmeier:2016:COLING,
  author    = {Hardmeier, Christian},
  title     = {A Neural Model for Part-of-Speech Tagging in Historical Texts},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {922--931},
  abstract  = {Historical texts are challenging for natural language processing because they
	differ linguistically from modern texts and because of their lack of
	orthographical and grammatical standardisation. We use a character-level neural
	network to build a part-of-speech (POS) tagger that can process historical data
	directly without requiring a separate spelling normalisation stage. Its
	performance in a Swedish verb identification and a German POS tagging task
	is similar to that of a two-stage model. We analyse the performance of this
	tagger and a more traditional baseline system, discuss some of the remaining
	problems for
	tagging historical data and suggest how the flexibility of our neural tagger
	could be exploited to address diachronic divergences in morphology and syntax
	in early modern Swedish with the help of data from closely related languages.},
  url       = {http://aclweb.org/anthology/C16-1088}
}

@InProceedings{wang-EtAl:2016:COLING2,
  author    = {Wang, Yunli  and  Jin, Yong  and  Zhu, Xiaodan  and  Goutte, Cyril},
  title     = {Extracting Discriminative Keyphrases with Learned Semantic Hierarchies},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {932--942},
  abstract  = {The goal of keyphrase extraction is to automatically identify the most salient
	phrases from documents. The technique has a wide range of applications such as
	rendering a quick glimpse of a document, or extracting key content for further
	use. While previous work often assumes keyphrases are a static property of a
	given documents, in many applications, the appropriate set of keyphrases that
	should be extracted depends on the set of documents that are being considered
	together. In particular, good keyphrases should not only accurately describe
	the content of a document, but also reveal what discriminates it from the other
	documents.
	In this paper, we study this problem of extracting discriminative keyphrases.
	In particularly, we propose to use the hierarchical semantic structure between
	candidate keyphrases to promote keyphrases that have the right level of
	specificity to clearly distinguish the target document from others. We show
	that such knowledge can be used to construct better discriminative keyphrase
	extraction systems that do not assume a static, fixed set of keyphrases for a
	document. We show how this helps identify key expertise of authors from their
	papers, as well as competencies covered by online courses within different
	domains.},
  url       = {http://aclweb.org/anthology/C16-1089}
}

@InProceedings{huang-EtAl:2016:COLING,
  author    = {Huang, Haoran  and  Zhang, Qi  and  Gong, Yeyun  and  Huang, Xuanjing},
  title     = {Hashtag Recommendation Using End-To-End Memory Networks with Hierarchical Attention},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {943--952},
  abstract  = {On microblogging services, people usually use hashtags to mark microblogs,
	which have a specific theme or content, making them easier for users to find.
	Hence, how to automatically recommend hashtags for microblogs has received much
	attention in recent years. Previous deep neural network-based hashtag
	recommendation approaches converted the task into a multi-class classification
	problem. However, most of these methods only took the microblog itself into
	consideration.              Motivated by the intuition that the history of users
	should
	impact the recommendation procedure, in this work, we extend end-to-end memory
	networks to perform this task. We incorporate the histories of users into the
	external memory and introduce a hierarchical attention mechanism to select more
	appropriate histories. To train and evaluate the proposed method, we also
	construct a dataset based on microblogs collected from Twitter. Experimental
	results demonstrate that the proposed methods can significantly outperform
	state-of-the-art methods. By incorporating the hierarchical attention
	mechanism, the relative improvement in the proposed method over the
	state-of-the-art method is around 67.9\% in the F1-score.},
  url       = {http://aclweb.org/anthology/C16-1090}
}

@InProceedings{bhatia-lau-baldwin:2016:COLING,
  author    = {Bhatia, Shraey  and  Lau, Jey Han  and  Baldwin, Timothy},
  title     = {Automatic Labelling of Topics with Neural Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {953--963},
  abstract  = {Topics generated by topic models are typically represented as list of
	terms. To reduce the cognitive overhead of interpreting these topics for
	end-users, we propose labelling a topic with a succinct phrase that
	summarises its theme or idea. Using Wikipedia document titles as label
	candidates, we compute neural embeddings for documents and words to
	select the most relevant labels for topics. Comparing to a
	state-of-the-art topic labelling system, our methodology is simpler,
	more efficient and finds better topic labels.},
  url       = {http://aclweb.org/anthology/C16-1091}
}

@InProceedings{shain-EtAl:2016:COLING,
  author    = {Shain, Cory  and  Bryce, William  and  Jin, Lifeng  and  Krakovna, Victoria  and  Doshi-Velez, Finale  and  Miller, Timothy  and  Schuler, William  and  Schwartz, Lane},
  title     = {Memory-Bounded Left-Corner Unsupervised Grammar Induction on Child-Directed Input},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {964--975},
  abstract  = {This paper presents a new memory-bounded left-corner parsing model for
	unsupervised raw-text syntax induction, using unsupervised hierarchical hidden
	Markov models (UHHMM). We deploy this algorithm to shed light on the extent to
	which human language learners can discover hierarchical syntax through
	distributional statistics alone, by modeling two widely-accepted features of
	human language acquisition and sentence processing that have not been
	simultaneously modeled by any existing grammar induction algorithm: (1) a
	left-corner parsing strategy and (2) limited working memory capacity. To model
	realistic input to human language learners, we evaluate our system on a corpus
	of child-directed speech rather than typical newswire corpora. Results beat or
	closely match those of three competing systems.},
  url       = {http://aclweb.org/anthology/C16-1092}
}

@InProceedings{herbelot-kochmar:2016:COLING,
  author    = {Herbelot, Aur\'{e}lie  and  Kochmar, Ekaterina},
  title     = {‘Calling on the classical phone’: a distributional model of adjective-noun errors in learners’ English},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {976--986},
  abstract  = {In this paper we discuss three key points related to error detection (ED) in
	learners’ English. We focus on content word ED as one of the most challenging
	tasks in this area, illustrating our claims on adjective--noun (AN)
	combinations. In particular, we (1) investigate the role of con- text in
	accurately capturing semantic anomalies and implement a system based on
	distributional topic coherence, which achieves state-of-the-art accuracy on a
	standard test set; (2) thoroughly investigate our system’s performance across
	individual adjective classes, concluding that a class- dependent approach is
	beneficial to the task; (3) discuss the data size bottleneck in this area, and
	highlight the challenges of automatic error generation for content words.},
  url       = {http://aclweb.org/anthology/C16-1093}
}

@InProceedings{todirascu-EtAl:2016:COLING,
  author    = {Todirascu, Amalia  and  Francois, Thomas  and  Bernhard, Delphine  and  Gala, Nuria  and  Ligozat, Anne-Laure},
  title     = {Are Cohesive Features Relevant for Text Readability Evaluation?},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {987--997},
  abstract  = {This paper investigates the effectiveness of 65 cohesion-based variables that
	are commonly used in the literature as predictive features to assess text
	readability. We evaluate the efficiency of these variables across narrative and
	informative texts intended for an audience of L2 French learners. In our
	experiments, we use a French corpus that has been both manually and
	automatically annotated as regards to co-reference and anaphoric chains. The
	efficiency of the 65 variables for readability is analyzed through a
	correlational analysis and some modelling experiments.},
  url       = {http://aclweb.org/anthology/C16-1094}
}

@InProceedings{littell-EtAl:2016:COLING,
  author    = {Littell, Patrick  and  Goyal, Kartik  and  Mortensen, David R.  and  Little, Alexa  and  Dyer, Chris  and  Levin, Lori},
  title     = {Named Entity Recognition for Linguistic Rapid Response in Low-Resource Languages: Sorani Kurdish and Tajik},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {998--1006},
  abstract  = {This paper describes our construction of named-entity recognition (NER) systems
	in two Western Iranian languages, Sorani Kurdish and Tajik, as a part of a
	pilot study of "Linguistic Rapid Response" to potential emergency humanitarian
	relief situations.  In the absence of large annotated corpora, parallel
	corpora, treebanks, bilingual lexica, etc., we found the following to be
	effective:  exploiting distributional regularities in monolingual data,
	projecting information across closely related languages, and utilizing human
	linguist judgments. We show promising results on both a four-month exercise in
	Sorani and a two-day exercise in Tajik, achieved with minimal annotation costs.},
  url       = {http://aclweb.org/anthology/C16-1095}
}

@InProceedings{exner-klang-nugues:2016:COLING,
  author    = {Exner, Peter  and  Klang, Marcus  and  Nugues, Pierre},
  title     = {Multilingual Supervision of Semantic Annotation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1007--1017},
  abstract  = {In this paper, we investigate the annotation projection of semantic units in a
	practical setting. Previous approaches have focused on using parallel corpora
	for semantic transfer. We evaluate an alternative approach using loosely
	parallel corpora that does not require the corpora to be exact translations of
	each other. We developed a method that transfers semantic annotations from one
	language to another using sentences aligned by entities, and we extended it to
	include alignments by entity-like linguistic units. We conducted our
	experiments on a large scale using the English, Swedish, and French language
	editions of Wikipedia. Our results show that the annotation projection using
	entities in combination with loosely parallel corpora provides a viable
	approach to extending previous attempts. In addition, it allows the generation
	of proposition banks upon which semantic parsers can be trained.},
  url       = {http://aclweb.org/anthology/C16-1096}
}

@InProceedings{rama:2016:COLING,
  author    = {Rama, Taraka},
  title     = {Siamese Convolutional Networks for Cognate Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1018--1027},
  abstract  = {In this paper, we present phoneme level Siamese convolutional networks for the
	task of pair-wise cognate identification. We represent a word as a
	two-dimensional matrix and employ a siamese convolutional network for learning
	deep representations. We present siamese architectures that jointly learn
	phoneme level feature representations and language relatedness from raw words
	for cognate identification. Compared to previous works, we train and test on
	larger and realistic datasets; and, show that siamese architectures
	consistently perform better than traditional linear classifier approach.},
  url       = {http://aclweb.org/anthology/C16-1097}
}

@InProceedings{he-li-zhuge:2016:COLING,
  author    = {He, Lei  and  Li, Wei  and  Zhuge, Hai},
  title     = {Exploring Differential Topic Models for Comparative Summarization of Scientific Papers},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1028--1038},
  abstract  = {This paper investigates differential topic models (dTM) for summarizing the
	differences among document groups. Starting from a simple probabilistic
	generative model, we propose dTM-SAGE that explicitly models the deviations on
	group-specific word distributions to indicate how words are used
	differen-tially across different document groups from a background word
	distribution. It is more effective to capture unique characteristics for
	comparing document groups. To generate dTM-based comparative summaries, we
	propose two sentence scoring methods for measuring the sentence discriminative
	capacity. Experimental results on scientific papers dataset show that our
	dTM-based comparative summari-zation methods significantly outperform the
	generic baselines and the state-of-the-art comparative summarization methods
	under ROUGE metrics.},
  url       = {http://aclweb.org/anthology/C16-1098}
}

@InProceedings{benikova-EtAl:2016:COLING,
  author    = {Benikova, Darina  and  Mieskes, Margot  and  Meyer, Christian M.  and  Gurevych, Iryna},
  title     = {Bridging the gap between extractive and abstractive summaries: Creation and evaluation of coherent extracts from heterogeneous sources},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1039--1050},
  abstract  = {Coherent extracts are a novel type of summary combining the advantages of
	manually created abstractive summaries, which are fluent but difficult to
	evaluate, and low-quality automatically created extractive summaries, which
	lack coherence and structure. We use a corpus of heterogeneous documents to
	address the issue that information seekers usually face -- a variety of
	different types of information sources. We directly extract information from
	these, but minimally redact and meaningfully order it to form a coherent text.
	Our qualitative and quantitative evaluations show that quantitative results are
	not sufficient to judge the quality of a summary and that other quality
	criteria, such as coherence, should also be taken into account. We find that
	our manually created corpus is of high quality and that it has the potential to
	bridge the gap between reference corpora of abstracts and automatic methods
	producing extracts. Our corpus is available to the research community for
	further development.},
  url       = {http://aclweb.org/anthology/C16-1099}
}

@InProceedings{wang-EtAl:2016:COLING3,
  author    = {Wang, Zhe  and  He, Wei  and  Wu, Hua  and  Wu, Haiyang  and  Li, Wei  and  Wang, Haifeng  and  Chen, Enhong},
  title     = {Chinese Poetry Generation with Planning based Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1051--1060},
  abstract  = {Chinese poetry generation is a very challenging task in natural language
	processing. In this paper, we propose a novel two-stage poetry generating
	method which first plans the sub-topics of the poem according to the user's
	writing intent, and then generates each line of the poem sequentially, using a
	modified recurrent neural network encoder-decoder framework. The proposed
	planning-based method can ensure that the generated poem is coherent and
	semantically consistent with the user's intent. A comprehensive evaluation with
	human judgments demonstrates that our proposed approach outperforms the
	state-of-the-art poetry generating methods and the poem quality is somehow
	comparable to human poets.},
  url       = {http://aclweb.org/anthology/C16-1100}
}

@InProceedings{chenal-cheung:2016:COLING,
  author    = {Chenal, Victor  and  Cheung, Jackie Chi Kit},
  title     = {Predicting sentential semantic compatibility for aggregation in text-to-text generation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1061--1070},
  abstract  = {We examine the task of aggregation in the context of text-to-text generation.
	We introduce a new aggregation task which frames the process as grouping input
	sentence fragments into clusters that are to be expressed as a single output
	sentence. We extract datasets for this task from a corpus using an automatic
	extraction process. Based on the results of a user study, we develop two
	gold-standard clusterings and corresponding evaluation methods for each
	dataset. We present a hierarchical clustering framework for predicting
	aggregation decisions on this task, which outperforms several baselines and can
	serve as a reference in future work.},
  url       = {http://aclweb.org/anthology/C16-1101}
}

@InProceedings{zopf-lozamencia-furnkranz:2016:COLING,
  author    = {Zopf, Markus  and  Loza Menc\'{i}a, Eneldo  and  F\"{u}rnkranz, Johannes},
  title     = {Sequential Clustering and Contextual Importance Measures for Incremental Update Summarization},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1071--1082},
  abstract  = {Unexpected events such as accidents, natural disasters and terrorist attacks
	represent an information situation where it is crucial to give users access to
	important and non-redundant information as early as possible. Incremental
	update summarization (IUS) aims at summarizing events which develop over time.
	In this paper, we propose a combination of sequential clustering and contextual
	importance measures to identify important sentences in a stream of documents in
	a timely manner. Sequential clustering is used to cluster similar sentences.
	The created clusters are scored by a contextual importance measure which
	identifies important information as well as redundant information. Experiments
	on the TREC Temporal Summarization 2015 shared task dataset show that our
	system achieves superior results compared to the best participating systems.},
  url       = {http://aclweb.org/anthology/C16-1102}
}

@InProceedings{goyal-dymetman-gaussier:2016:COLING,
  author    = {Goyal, Raghav  and  Dymetman, Marc  and  Gaussier, Eric},
  title     = {Natural Language Generation through Character-based RNNs with Finite-state Prior Knowledge},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1083--1092},
  abstract  = {Recently Wen et al. (2015) have proposed a Recurrent Neural Network (RNN)
	approach to the generation of utterances from dialog acts, and shown that
	although their model requires less effort to develop than a rule-based system,
	it is able to improve certain aspects of the utterances, in particular their
	naturalness. However their system employs generation at the word-level, which
	requires one to pre-process the data by substituting named entities with
	placeholders. This pre-processing prevents the model from handling some
	contextual effects and from managing multiple occurrences of the same
	attribute.
	Our approach uses a character-level model, which unlike the word-level model
	makes it possible to learn to ``copy" information from the dialog act to the
	target without having to pre-process the input. In order to avoid generating
	non-words and inventing information not present in the input, we propose a
	method for incorporating prior knowledge into the RNN in the form of a weighted
	finite-state automaton over character sequences. Automatic and human
	evaluations show improved performance over baselines on several evaluation
	criteria.},
  url       = {http://aclweb.org/anthology/C16-1103}
}

@InProceedings{chachra-EtAl:2016:COLING,
  author    = {Chachra, Suchet  and  Ben Abacha, Asma  and  Shooshan, Sonya  and  Rodriguez, Laritza  and  Demner-Fushman, Dina},
  title     = {A Hybrid Approach to Generation of Missing Abstracts in Biomedical Literature},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1093--1100},
  abstract  = {Readers usually rely on abstracts to identify relevant medical information from
	scientific articles. Abstracts are also essential to advanced information
	retrieval methods. More than 50 thousand scientific publications in PubMed lack
	author-generated abstracts, and the relevancy judgements for these papers have
	to be based on their titles alone. In this paper, we propose a hybrid
	summarization technique that aims to select the most pertinent sentences from
	articles to generate an extractive summary in lieu of a missing abstract. We
	combine i) health outcome detection, ii) keyphrase extraction, and iii) textual
	entailment recognition between sentences. We evaluate our hybrid approach and
	analyze the improvements of multi-factor summarization over techniques that
	rely on a single method, using a collection of 295 manually generated reference
	summaries. The obtained results show that the hybrid approach outperforms the
	baseline techniques with an improvement of 13% in recall and 4% in F1 score.},
  url       = {http://aclweb.org/anthology/C16-1104}
}

@InProceedings{lampouras-vlachos:2016:COLING,
  author    = {Lampouras, Gerasimos  and  Vlachos, Andreas},
  title     = {Imitation learning for language generation from unaligned data},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1101--1112},
  abstract  = {Natural language generation (NLG) is the task of generating natural language
	from a meaning representation. Current rule-based approaches require
	domain-specific and manually constructed linguistic resources, while most
	machine-learning based approaches rely on aligned training data and/or phrase
	templates. The latter are needed to restrict the search space for the
	structured prediction task defined by the unaligned datasets. In this work we
	propose the use of imitation learning for structured prediction which learns an
	incremental model that handles the large search space by avoiding explicit
	enumeration of the outputs. We focus on the Locally Optimal Learning to Search
	framework which allows us to train against non-decomposable loss functions such
	as the BLEU or ROUGE scores while not assuming gold standard alignments. We
	evaluate our approach on three datasets using both automatic measures and human
	judgements and achieve results comparable to the state-of-the-art approaches
	developed for each of them.},
  url       = {http://aclweb.org/anthology/C16-1105}
}

@InProceedings{yu-EtAl:2016:COLING,
  author    = {Yu, Naitong  and  Huang, Minlie  and  Shi, Yuanyuan  and  zhu, xiaoyan},
  title     = {Product Review Summarization by Exploiting Phrase Properties},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1113--1124},
  abstract  = {We propose a phrase-based approach for generating product review summaries. The
	main idea of our method is to leverage phrase properties to choose a subset of
	optimal phrases for generating the final summary. Specifically, we exploit two
	phrase properties, popularity and specificity. Popularity describes how popular
	the phrase is in the original reviews. Specificity describes how descriptive a
	phrase is in comparison to generic comments. We formalize the phrase selection
	procedure as an optimization problem and solve it using integer linear
	programming (ILP). An aspect-based bigram language model is used for generating
	the final summary with the selected phrases. Experiments show that our
	summarizer outperforms the other baselines.
	Author{4}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1106}
}

@InProceedings{araki-EtAl:2016:COLING,
  author    = {Araki, Jun  and  Rajagopal, Dheeraj  and  Sankaranarayanan, Sreecharan  and  Holm, Susan  and  Yamakawa, Yukari  and  Mitamura, Teruko},
  title     = {Generating Questions and Multiple-Choice Answers using Semantic Analysis of Texts},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1125--1136},
  abstract  = {We present a novel approach to automated question generation that improves upon
	prior work both from a technology perspective and from an assessment
	perspective. Our system is aimed at engaging language learners by generating
	multiple-choice questions which utilize specific inference steps over multiple
	sentences, namely coreference resolution and paraphrase detection. The system
	also generates correct answers and semantically-motivated phrase-level
	distractors as answer choices. Evaluation by human annotators indicates that
	our approach requires a larger number of inference steps, which necessitate
	deeper semantic understanding of texts than a traditional single-sentence
	approach.},
  url       = {http://aclweb.org/anthology/C16-1107}
}

@InProceedings{marques-beuls:2016:COLING,
  author    = {Marques, Tania  and  Beuls, Katrien},
  title     = {Evaluation Strategies for Computational Construction Grammars},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1137--1146},
  abstract  = {Despite the growing number of Computational Construction Grammar
	implementations, the field is still lacking evaluation methods to compare
	grammar fragments across different platforms. Moreover, the hand-crafted nature
	of most grammars requires profiling tools to understand the complex
	interactions between constructions of different types. This paper presents a
	number of evaluation measures, partially based on existing measures in the
	field of semantic parsing, that are especially relevant for reversible grammar
	formalisms. The measures are tested on a grammar fragment for European
	Portuguese clitic placement that is currently under development.},
  url       = {http://aclweb.org/anthology/C16-1108}
}

@InProceedings{kajiwara-komachi:2016:COLING,
  author    = {Kajiwara, Tomoyuki  and  Komachi, Mamoru},
  title     = {Building a Monolingual Parallel Corpus for Text Simplification Using Sentence Similarity Based on Alignment between Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1147--1158},
  abstract  = {Methods for text simplification using the framework of statistical machine
	translation have been extensively studied in recent years.
	However, building the monolingual parallel corpus necessary for training the
	model requires costly human annotation.
	Monolingual parallel corpora for text simplification have therefore been built
	only for a limited number of languages, such as English and Portuguese.
	To obviate the need for human annotation, we propose an unsupervised method
	that automatically builds the monolingual parallel corpus for text
	simplification using sentence similarity based on word embeddings.
	For any sentence pair comprising a complex sentence and its simple counterpart,
	we employ a many-to-one method of aligning each word in the complex sentence
	with the most similar word in the simple sentence and compute sentence
	similarity by averaging these word similarities. 
	The experimental results demonstrate the excellent performance of the proposed
	method in a monolingual parallel corpus construction task for English text
	simplification.
	The results also demonstrated the superior accuracy in text simplification that
	use the framework of statistical machine translation trained using the corpus
	built by the proposed method to that using the existing corpora.},
  url       = {http://aclweb.org/anthology/C16-1109}
}

@InProceedings{servan-EtAl:2016:COLING,
  author    = {Servan, Christophe  and  Berard, Alexandre  and  elloumi, zied  and  Blanchon, Herv\'{e}  and  Besacier, Laurent},
  title     = {Word2Vec vs DBnary: Augmenting METEOR using Vector Representations or Lexical Resources?},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1159--1168},
  abstract  = {This paper presents an approach combining lexico-semantic resources and
	distributed representations of words applied to the evaluation in machine
	translation (MT). This study is made through the enrichment of a well-known MT
	evaluation metric: METEOR. METEOR enables an approximate match (synonymy or
	morphological similarity) between an automatic and a reference translation. Our
	experiments are made in the framework of the Metrics task of WMT 2014. We show
	that distributed representations are a good alternative to
	lexico-semanticresources for MT evaluation and they can even bring interesting
	additional information. The augmented versions of METEOR, using vector
	representations, are made available on our Github page.},
  url       = {http://aclweb.org/anthology/C16-1110}
}

@InProceedings{derczynski-bontcheva-roberts:2016:COLING,
  author    = {Derczynski, Leon  and  Bontcheva, Kalina  and  Roberts, Ian},
  title     = {Broad Twitter Corpus: A Diverse Named Entity Recognition Resource},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1169--1179},
  abstract  = {One of the main obstacles, hampering method development and comparative
	evaluation of named entity recognition in social media, is the lack of a
	sizeable, diverse, high quality annotated corpus, analogous to the CoNLL'2003
	news dataset. For instance, the biggest Ritter tweet corpus is only 45,000
	tokens -- a mere 15% the size of CoNLL'2003. Another major shortcoming is the
	lack of temporal, geographic, and author diversity. This paper introduces the
	Broad Twitter Corpus (BTC), which is not only significantly bigger, but sampled
	across different regions, temporal periods, and types of Twitter users. The
	gold-standard named entity annotations are made by a combination of NLP experts
	and crowd workers, which enables us to harness crowd recall while maintaining
	high quality. We also measure the entity drift observed in our dataset (i.e.
	how entity representation varies over time), and compare to newswire.  The
	corpus is released openly, including source text and intermediate annotations.},
  url       = {http://aclweb.org/anthology/C16-1111}
}

@InProceedings{ilievski-postma-vossen:2016:COLING,
  author    = {Ilievski, Filip  and  Postma, Marten  and  Vossen, Piek},
  title     = {Semantic overfitting: what 'world' do we consider when evaluating disambiguation of text?},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1180--1191},
  abstract  = {Semantic text processing faces the challenge of defining the relation between
	lexical expressions and the world to which they make reference within a period
	of time. It is unclear whether the current test sets used to evaluate
	disambiguation tasks are representative for the full complexity considering
	this time-anchored relation, resulting in semantic overfitting to a specific
	period and the frequent phenomena within. We conceptualize and formalize a set
	of metrics which evaluate this complexity of datasets. We provide evidence for
	their applicability on five different disambiguation tasks. To challenge
	semantic overfitting of disambiguation systems, we propose a time-based,
	metric-aware method for developing datasets in a systematic and semi-automated
	manner, as well as an event-based QA task.},
  url       = {http://aclweb.org/anthology/C16-1112}
}

@InProceedings{suzuki-takatsuka:2016:COLING,
  author    = {Suzuki, Shoko  and  Takatsuka, Hiromichi},
  title     = {Extraction of Keywords of Novelties From Patent Claims},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1192--1200},
  abstract  = {There are growing needs for patent analysis using Natural Language Processing
	(NLP)-based approaches. Although NLP-based approaches can extract various
	information from patents, there
	are very few approaches proposed to extract those parts what inventors regard
	as novel or having an inventive step compared to all existing works ever. To
	extract such parts is difficult even
	for human annotators except for well-trained experts. This causes many
	difficulties in analyzing
	patents. We propose a novel approach to automatically extract such keywords
	that
	relate to novelties or inventive steps from patent claims using the structure
	of the claims. In addition, we also
	propose a new framework of evaluating our approach. The experiments show that
	our approach outperforms the existing keyword extraction methods significantly
	in many technical fields.},
  url       = {http://aclweb.org/anthology/C16-1113}
}

@InProceedings{hsi-EtAl:2016:COLING,
  author    = {Hsi, Andrew  and  Yang, Yiming  and  Carbonell, Jaime  and  Xu, Ruochen},
  title     = {Leveraging Multilingual Training for Limited Resource Event Extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1201--1210},
  abstract  = {Event extraction has become one of the most important topics in information
	extraction, but to date, there is very limited work on leveraging cross-lingual
	training to boost performance. We propose a new event extraction approach that
	trains on multiple languages using a combination of both language-dependent and
	language-independent features, with particular focus on the case where target
	domain training data is of very limited size. We show empirically that
	multilingual training can boost performance for the tasks of event trigger
	extraction and event argument extraction on the Chinese ACE 2005 dataset.},
  url       = {http://aclweb.org/anthology/C16-1114}
}

@InProceedings{albadrashiny-diab:2016:COLING,
  author    = {Al-Badrashiny, Mohamed  and  Diab, Mona},
  title     = {LILI: A Simple Language Independent Approach for Language Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1211--1219},
  abstract  = {We introduce a generic Language Independent Framework for Linguistic Code
	Switch Point Detection. The system uses characters level 5-grams and word level
	unigram language models to train a  conditional random fields (CRF) model for
	classifying  input words into various languages. We test our proposed framework
	and compare it to the state-of-the-art published systems on standard data sets
	from several language pairs: English-Spanish, Nepali-English, English-Hindi,
	Arabizi (Refers to Arabic written using the Latin/Roman script)-English, 
	Arabic-Engari (Refers to English written using                                     
	Arabic
	script),
	Modern
	Standard
	Arabic(MSA)-Egyptian, Levantine-MSA, Gulf-MSA, one more English-Spanish, and
	one more MSA-EGY.  The overall weighted average F-score of each language pair
	are 96.4%, 97.3%, 98.0%, 97.0%, 98.9%, 86.3%, 88.2%, 90.6%, 95.2%, and 85.0% 
	respectively. The results show that our approach despite its simplicity, either
	outperforms or performs at comparable levels to state-of-the-art published
	systems.},
  url       = {http://aclweb.org/anthology/C16-1115}
}

@InProceedings{tayyarmadabushi-lee:2016:COLING,
  author    = {Tayyar Madabushi, Harish  and  Lee, Mark},
  title     = {High Accuracy Rule-based Question Classification using Question Syntax and Semantics},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1220--1230},
  abstract  = {We present in this paper a purely rule-based system for Question Classification
	which we divide into two parts: The first is the extraction of relevant words
	from a question by use of its structure, and the second is the classification
	of questions based on rules that associate these words to Concepts. We achieve
	an accuracy of 97.2%, close to a 6 point improvement over the previous State of
	the Art of 91.6%. Additionally, we believe that machine learning algorithms can
	be applied on top of this method to further improve accuracy.},
  url       = {http://aclweb.org/anthology/C16-1116}
}

@InProceedings{xiang-EtAl:2016:COLING,
  author    = {Xiang, Yang  and  Zhou, Xiaoqiang  and  Chen, Qingcai  and  Zheng, Zhihui  and  Tang, Buzhou  and  Wang, Xiaolong  and  Qin, Yang},
  title     = {Incorporating Label Dependency for Answer Quality Tagging in Community Question Answering via CNN-LSTM-CRF},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1231--1241},
  abstract  = {In community question answering (cQA), the quality of answers are determined by
	the matching degree between question-answer pairs and the correlation among the
	answers. In this paper, we show that the dependency between the answer quality
	labels also plays a pivotal role. To validate the effectiveness of label
	dependency, we propose two neural network-based models, with different
	combination modes of Convolutional Neural Net-works, Long Short Term Memory and
	Conditional Random Fields. Extensive experi-ments are taken on the dataset
	released by the SemEval-2015 cQA shared task. The first model is a stacked
	ensemble of the networks. It achieves 58.96% on macro averaged F1, which
	improves the state-of-the-art neural network-based method by 2.82% and
	outper-forms the Top-1 system in the shared task by 1.77%. The second is a
	simple attention-based model whose input is the connection of the question and
	its corresponding answers. It produces promising results with 58.29% on overall
	F1 and gains the best performance on the Good and Bad categories.},
  url       = {http://aclweb.org/anthology/C16-1117}
}

@InProceedings{liebeskind-hacohenkerner:2016:COLING,
  author    = {Liebeskind, Chaya  and  HaCohen-Kerner, Yaakov},
  title     = {Semantically Motivated Hebrew Verb-Noun Multi-Word Expressions Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1242--1253},
  abstract  = {Identification of Multi-Word Expressions (MWEs) lies at the heart of many
	natural language processing applications.
	  In this research, we deal with a particular type of Hebrew MWEs, Verb-Noun
	MWEs (VN-MWEs), which combine a verb and a noun with or without other words.
	  Most prior work on MWEs classification focused on linguistic and statistical
	information. In this paper, we claim that it is essential to utilize semantic
	  information. To this end, we propose a semantically motivated indicator for
	classifying VN-MWE and define features that are related to various semantic
	spaces and combine them as features in a supervised classification framework.
	We empirically demonstrate that our semantic feature set yields better
	performance than the common linguistic and statistical feature sets and that
	combining semantic features contributes to the VN-MWEs identification task.},
  url       = {http://aclweb.org/anthology/C16-1118}
}

@InProceedings{xiao-liu:2016:COLING,
  author    = {Xiao, Minguang  and  Liu, Cong},
  title     = {Semantic Relation Classification via Hierarchical Recurrent Neural Network with Attention},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1254--1263},
  abstract  = {Semantic relation classification remains a challenge in natural language
	processing. In this paper, we introduce a hierarchical recurrent neural network
	that is capable of extracting information from raw sentences for relation
	classification. Our model has several distinctive features: (1) Each sentence
	is divided into three context subsequences according to two annotated nominals,
	which allows the model to encode each context subsequence independently so as
	to selectively focus as on the important context information; (2) The
	hierarchical model consists of two recurrent neural networks (RNNs): the first
	one learns context representations of the three context subsequences
	respectively, and the second one computes semantic composition of these three
	representations and produces a sentence representation for the relationship
	classification of the two nominals. (3) The attention mechanism is adopted in
	both RNNs to encourage the model to concentrate on the important information
	when learning the sentence representations. Experimental results on the
	SemEval-2010 Task 8 dataset demonstrate that our model is comparable to the
	state-of-the-art without using any hand-crafted features.},
  url       = {http://aclweb.org/anthology/C16-1119}
}

@InProceedings{guo-EtAl:2016:COLING2,
  author    = {Guo, Jiang  and  Che, Wanxiang  and  Wang, Haifeng  and  Liu, Ting  and  Xu, Jun},
  title     = {A Unified Architecture for Semantic Role Labeling and Relation Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1264--1274},
  abstract  = {This paper describes a unified neural architecture for identifying and
	classifying multi-typed semantic relations between words in a sentence.
	We investigate two typical and well-studied tasks: semantic role labeling (SRL)
	which identifies the relations between predicates and arguments, and relation
	classification (RC) which focuses on the relation between two entities or
	nominals.
	While mostly studied separately in prior work, we show that the two tasks can
	be effectively connected and modeled using a general architecture.
	Experiments on CoNLL-2009 benchmark datasets show that our SRL models
	significantly outperform state-of-the-art approaches.
	Our RC models also yield competitive performance with the best published
	records.
	Furthermore, we show that the two tasks can be trained jointly with multi-task
	learning, resulting in additive significant improvements for SRL.},
  url       = {http://aclweb.org/anthology/C16-1120}
}

@InProceedings{do-bethard-moens:2016:COLING,
  author    = {Do, Quynh Ngoc Thi  and  Bethard, Steven  and  Moens, Marie-Francine},
  title     = {Facing the most difficult case of Semantic Role Labeling: A collaboration of word embeddings and co-training},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1275--1284},
  abstract  = {We present a successful collaboration of word embeddings and co-training to
	tackle in the most difficult test case of semantic role labeling: predicting
	out-of-domain and unseen semantic frames. Despite the fact that co-training is
	a successful traditional semi-supervised method, its application in SRL is very
	limited especially when a huge amount of labeled data is available. In this
	work, co-training is used together with word embeddings to improve the
	performance of a system trained on a large training dataset. We also introduce
	a semantic role labeling system with a simple learning architecture and
	effective inference that is easily adaptable to semi-supervised settings with
	new training data and/or new features. On the out-of-domain testing set of the
	standard benchmark CoNLL 2009 data our simple approach achieves high
	performance and improves state-of-the-art results.},
  url       = {http://aclweb.org/anthology/C16-1121}
}

@InProceedings{pado-EtAl:2016:COLING,
  author    = {Pad\'{o}, Sebastian  and  Herbelot, Aur\'{e}lie  and  Kisselew, Max  and  \v{S}najder, Jan},
  title     = {Predictability of Distributional Semantics in Derivational Word Formation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1285--1296},
  abstract  = {Compositional distributional semantic models (CDSMs) have successfully been
	applied to the task of predicting the meaning of a range of linguistic
	constructions. Their performance on semi- compositional word formation process
	of (morphological) derivation, however, has been extremely variable, with no
	large-scale empirical investigation to date. This paper fills that gap,
	performing an analysis of CDSM predictions on a large dataset (over 30,000
	German derivationally related word pairs). We use linear regression models to
	analyze CDSM performance and obtain insights into the linguistic factors that
	influence how predictable the distributional context of a derived word is going
	to be. We identify various such factors, notably part of speech, argument
	structure, and semantic regularity.},
  url       = {http://aclweb.org/anthology/C16-1122}
}

@InProceedings{ohoran-EtAl:2016:COLING,
  author    = {O'Horan, Helen  and  Berzak, Yevgeni  and  Vulic, Ivan  and  Reichart, Roi  and  Korhonen, Anna},
  title     = {Survey on the Use of Typological Information in Natural Language Processing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1297--1308},
  abstract  = {In recent years linguistic typologies, which classify the world’s languages
	according to their functional and structural properties, have been widely used
	to support multilingual NLP. While the growing importance of typologies in
	supporting multilingual tasks has been recognised, no systematic survey of
	existing typological resources and their use in NLP has been published. This
	paper provides such a survey as well as discussion which we hope will both
	inform and inspire future work in the area.},
  url       = {http://aclweb.org/anthology/C16-1123}
}

@InProceedings{gelderloos-chrupala:2016:COLING,
  author    = {Gelderloos, Lieke  and  Chrupa{\l}a, Grzegorz},
  title     = {From phonemes to images: levels of representation in a recurrent neural model of visually-grounded language learning},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1309--1319},
  abstract  = {We present a model of visually-grounded language learning based on stacked
	gated recurrent neural networks which learns to predict visual features given
	an image description in the form of a sequence of phonemes. The learning task
	resembles that faced by human language learners who need to discover both
	structure and meaning from noisy and ambiguous data across modalities. We show
	that our model indeed learns to predict features of the visual context given
	phonetically transcribed image descriptions, and show that it represents
	linguistic information in a hierarchy of levels: lower layers in the stack are
	comparatively more sensitive to form, whereas higher layers are more sensitive
	to meaning.},
  url       = {http://aclweb.org/anthology/C16-1124}
}

@InProceedings{vaidya-agarwal-palmer:2016:COLING,
  author    = {Vaidya, Ashwini  and  Agarwal, Sumeet  and  Palmer, Martha},
  title     = {Linguistic features for Hindi light verb construction identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1320--1329},
  abstract  = {Light verb constructions (LVC) in Hindi are highly productive. If we can
	distinguish a case such as nirnay lenaa ‘decision take; decide’ from an
	ordinary verb-argument combination kaagaz lenaa ‘paper take; take (a)
	paper’,it has been shown to aid NLP applications such as parsing (Begum et
	al., 2011) and machine translation (Pal et al., 2011). In this paper, we
	propose an LVC identification system using language specific features for Hindi
	which shows an improvement over previous work(Begum et al., 2011). To build our
	system, we carry out a linguistic analysis of Hindi LVCs using Hindi Treebank
	annotations and propose two new features that are aimed at capturing the
	diversity of Hindi LVCs in the corpus. We find that our model performs
	robustly across a diverse range of LVCs and our results underscore the
	importance of semantic features, which is in keeping with the findings for
	English. Our error analysis also demonstrates that our classifier can be used
	to further refine LVC annotations in the Hindi Treebank and make them more
	consistent across the board.},
  url       = {http://aclweb.org/anthology/C16-1125}
}

@InProceedings{barrett-keller-sogaard:2016:COLING,
  author    = {Barrett, Maria  and  Keller, Frank  and  S{\o}gaard, Anders},
  title     = {Cross-lingual Transfer of Correlations between Parts of Speech and Gaze Features},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1330--1339},
  abstract  = {Several recent studies have shown that eye movements during reading provide
	information about grammatical and syntactic processing, which can assist the
	induction of NLP models. All these studies have been limited to English,
	however. This study shows that gaze and part of speech (PoS) correlations
	largely transfer across English and French. This means that we can replicate
	previous studies on gaze-based PoS tagging for French, but also that we can use
	English gaze data to assist the induction of French NLP models.},
  url       = {http://aclweb.org/anthology/C16-1126}
}

@InProceedings{wang-mi-ittycheriah:2016:COLING,
  author    = {Wang, Zhiguo  and  Mi, Haitao  and  Ittycheriah, Abraham},
  title     = {Sentence Similarity Learning by Lexical Decomposition and Composition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1340--1349},
  abstract  = {Most conventional sentence similarity methods only focus on similar parts of
	two input sentences,
	and simply ignore the dissimilar parts, 
	which usually give us some clues and semantic meanings about the sentences. 
	In this work, we propose a model to take into account both the similarities and
	dissimilarities 
	by decomposing and composing lexical semantics over sentences. 
	The model represents each word as a vector, 
	and calculates a semantic matching vector for each word based on all words in
	the other sentence. 
	Then, each word vector is decomposed into a similar component and a dissimilar
	component based on the semantic matching vector. 
	After this, a two-channel CNN model is employed to capture features 
	by composing the similar and dissimilar components. 
	Finally, a similarity score is estimated over the composed feature vectors. 
	Experimental results 
	show that 
	our model gets the state-of-the-art performance on the answer sentence
	selection task, 
	and achieves a comparable result on the paraphrase identification task.},
  url       = {http://aclweb.org/anthology/C16-1127}
}

@InProceedings{wang-he:2016:COLING,
  author    = {Wang, Chengyu  and  He, Xiaofeng},
  title     = {Chinese Hypernym-Hyponym Extraction from User Generated Categories},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1350--1361},
  abstract  = {Hypernym-hyponym (“is-a”) relations are key components in taxonomies,
	object hierarchies and knowledge graphs. While there is abundant research on
	is-a relation extraction in English, it still remains a challenge to identify
	such relations from Chinese knowledge sources accurately due to the flexibility
	of language expression. In this paper, we introduce a weakly supervised
	framework to extract Chinese is-a relations from user generated categories. It
	employs piecewise linear projection models trained on a Chinese taxonomy and an
	iterative learning algorithm to update models incrementally. A pattern-based
	relation selection method is proposed to prevent “semantic drift” in the
	learning process using bi-criteria optimization. Experimental results
	illustrate that the proposed approach outperforms state-of-the-art methods.},
  url       = {http://aclweb.org/anthology/C16-1128}
}

@InProceedings{emms-jayapal:2016:COLING,
  author    = {Emms, Martin  and  Jayapal, Arun kumar},
  title     = {Dynamic Generative model for Diachronic Sense Emergence Detection},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1362--1373},
  abstract  = {As time passes words can acquire meanings they did not previously have, such as
	the ‘twitter post’ usage of ‘tweet’. We address how this can be
	detected from time-stamped raw text. We propose a generative model with senses
	dependent on times and context words dependent on senses but otherwise eternal,
	and a Gibbs sampler for estimation. We obtain promising parameter estimates for
	positive (resp. negative) cases of known sense emergence (resp non-emergence)
	and adapt the ‘pseudo-word’ technique (Schutze, 1992) to give a novel
	further evaluation via ‘pseudo-neologisms’. The question of ground-truth is
	also addressed and a technique proposed to locate an emergence date for
	evaluation purposes.},
  url       = {http://aclweb.org/anthology/C16-1129}
}

@InProceedings{yuan-EtAl:2016:COLING,
  author    = {Yuan, Dayu  and  Richardson, Julian  and  Doherty, Ryan  and  Evans, Colin  and  Altendorf, Eric},
  title     = {Semi-supervised Word Sense Disambiguation with Neural Models},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1374--1385},
  abstract  = {Determining the intended sense of words in text -- word sense disambiguation
	(WSD) -- is a long-standing problem in natural language processing. Recently,
	researchers have shown promising results using word vectors extracted from a
	neural network language model as features in WSD algorithms. However, a simple
	average or concatenation of word vectors for each word in a text loses the
	sequential and syntactic information of the text. 
	In this paper, we study WSD with a sequence learning neural net, LSTM, to
	better capture the sequential and syntactic patterns of the text. To alleviate
	the lack of training data in all-words WSD, we employ the same LSTM in a
	semi-supervised label propagation classifier. We demonstrate state-of-the-art
	results, especially on verbs.},
  url       = {http://aclweb.org/anthology/C16-1130}
}

@InProceedings{zhang-EtAl:2016:COLING2,
  author    = {Zhang, Jian  and  Wu, Xiaofeng  and  Way, Andy  and  Liu, Qun},
  title     = {Fast Gated Neural Domain Adaptation: Language Model as a Case Study},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1386--1397},
  abstract  = {Neural network training has been shown to be advantageous in many natural
	language processing applications, such as language modelling or machine
	translation. In this paper, we describe in detail a novel domain adaptation
	mechanism in neural network training. Instead of learning and adapting the
	neural network on millions of training sentences -- which can be very
	time-consuming or even infeasible in some cases -- we design a domain
	adaptation gating mechanism which can be used in recurrent neural networks and
	quickly learn the out-of-domain knowledge directly from the word vector
	representations with little speed overhead. In our experiments, we use the
	recurrent neural network language model (LM) as a case study. We show that the
	neural LM perplexity can be reduced by 7.395 and 12.011 using the proposed
	domain adaptation mechanism on the Penn Treebank and News data, respectively.
	Furthermore, we show that using the domain-adapted neural LM to re-rank the
	statistical machine translation n-best list on the French-to-English language
	pair can significantly improve translation quality.},
  url       = {http://aclweb.org/anthology/C16-1131}
}

@InProceedings{guzman-EtAl:2016:COLING,
  author    = {Guzm\'{a}n, Francisco  and  Bouamor, Houda  and  Baly, Ramy  and  Habash, Nizar},
  title     = {Machine Translation Evaluation for Arabic using Morphologically-enriched Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1398--1408},
  abstract  = {Evaluation of machine translation (MT) into morphologically rich languages
	(MRL) has not been
	well studied despite posing many challenges. In this paper, we explore the use
	of embeddings obtained from different levels of lexical and morpho-syntactic
	linguistic analysis and show that they improve MT evaluation into an MRL.
	Specifically we report on Arabic, a language with complex and rich morphology.
	Our results show that using a neural-network model with different input
	representations produces results that clearly outperform the state-of-the-art
	for MT evaluation into Arabic, by almost over 75% increase in correlation with
	human judgments on pairwise MT evaluation quality task. More importantly, we
	demonstrate the usefulness of morpho-syntactic representations to model
	sentence similarity for MT evaluation and address complex linguistic phenomena
	of Arabic.},
  url       = {http://aclweb.org/anthology/C16-1132}
}

@InProceedings{garmash-monz:2016:COLING,
  author    = {Garmash, Ekaterina  and  Monz, Christof},
  title     = {Ensemble Learning for Multi-Source Neural Machine Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1409--1418},
  abstract  = {In this paper we describe and evaluate methods to perform ensemble prediction
	in neural machine translation (NMT). We compare two methods of ensemble set
	induction: sampling parameter initializations for an NMT system, which is a
	relatively established method in NMT (Sutskever et al., 2014), and NMT systems
	translating from different source languages into the same target language,
	i.e., multi-source ensembles, a method recently introduced by Firat et al.
	(2016). We are motivated by the observation that for different language pairs
	systems make different types of mistakes. We propose several methods with
	different degrees of parameterization to combine individual predictions of NMT
	systems so that they mutually compensate for each other’s mistakes and
	improve overall performance. We find that the biggest improvements can be
	obtained from a context-dependent weighting scheme for multi-source ensembles.
	This result offers stronger support for the linguistic motivation of using
	multi-source ensembles than previous approaches. Evaluation is carried out for
	German and French into English translation. The best multi-source ensemble
	method achieves an improvement of up to 2.2 BLEU points over the strongest
	single-source ensemble baseline, and a 2 BLEU improvement over a multi-source
	ensemble baseline.},
  url       = {http://aclweb.org/anthology/C16-1133}
}

@InProceedings{oda-EtAl:2016:COLING,
  author    = {Oda, Yusuke  and  Kudo, Taku  and  Nakagawa, Tetsuji  and  Watanabe, Taro},
  title     = {Phrase-based Machine Translation using Multiple Preordering Candidates},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1419--1428},
  abstract  = {In this paper, we propose a new decoding method for phrase-based statistical
	machine translation which directly uses multiple preordering candidates as a
	graph structure.
	Compared with previous phrase-based decoding methods,
	our method is based on a simple left-to-right dynamic programming in which no
	decoding-time reordering is performed.
	As a result, its runtime is very fast and implementing the algorithm becomes
	easy.
	Our system does not depend on specific preordering methods as long as they
	output multiple preordering candidates,
	and it is trivial to employ existing preordering methods into our system.
	In our experiments for translating diverse 11 languages into English,
	the proposed method outperforms conventional phrase-based decoder in terms of
	translation qualities under comparable or faster decoding time.},
  url       = {http://aclweb.org/anthology/C16-1134}
}

@InProceedings{suggu-EtAl:2016:COLING,
  author    = {Suggu, Sai Praneeth  and  Naga Goutham, Kushwanth  and  Chinnakotla, Manoj K.  and  Shrivastava, Manish},
  title     = {Hand in Glove: Deep Feature Fusion Network Architectures for Answer Quality Prediction in Community Question Answering},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1429--1440},
  abstract  = {Community Question Answering (cQA) forums have become a popular medium for
	soliciting direct answers to specific questions of users from experts or other
	experienced users on a given topic. However, for a given question, users
	sometimes have to sift through a large number of low-quality or irrelevant
	answers to find out the answer which satisfies their information need. To
	alleviate this, the problem of Answer Quality Prediction (AQP) aims to predict
	the quality of an answer posted in response to a forum question. Current AQP
	systems either learn models using - a) various hand-crafted features (HCF) or
	b) Deep Learning (DL) techniques which automatically learn the required feature
	representations. 
	In this paper, we propose a novel approach for AQP known as - “Deep Feature
	Fusion Network (DFFN)” which combines the advantages of both hand-crafted
	features and deep learning based systems. Given a question-answer pair along
	with its metadata, the DFFN architecture independently - a) learns features
	from the Deep Neural Network (DNN) and b) computes hand-crafted features using
	various external resources and then combines them using a fully connected
	neural network trained to predict the final answer quality. DFFN is end-end
	differentiable and trained as a single system. We propose two different DFFN
	architectures which vary mainly in the way they model the input question/answer
	pair - DFFN-CNN uses a Convolutional Neural Network (CNN) and DFFN-BLNA uses a
	Bi-directional LSTM with Neural Attention (BLNA). Both these proposed variants
	of DFFN (DFFN-CNN and DFFN-BLNA) achieve state-of-the-art  performance on the
	standard SemEval-2015 and SemEval-2016 benchmark datasets and outperforms
	baseline approaches which individually employ either HCF or DL based techniques
	alone.},
  url       = {http://aclweb.org/anthology/C16-1135}
}

@InProceedings{li-EtAl:2016:COLING3,
  author    = {Li, Fangyuan  and  Huang, Ruihong  and  Xiong, Deyi  and  Zhang, Min},
  title     = {Learning Event Expressions via Bilingual Structure Projection},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1441--1450},
  abstract  = {Identifying events of a specific type is a challenging task as events in texts
	are described in numerous and diverse ways. Aiming to resolve high complexities
	of event descriptions, previous work (Huang and Riloff, 2013) proposes
	multi-faceted event recognition and a bootstrapping method to automatically
	acquire both event facet phrases and event expressions from unannotated texts.
	However, to ensure high quality of learned phrases, this method is constrained
	to only learn phrases that match certain syntactic structures. In this paper,
	we propose a bilingual structure projection algorithm that explores linguistic
	divergences between two languages (Chinese and English) and mines new phrases
	with new syntactic structures, which have been ignored in the previous work.
	Experiments show that our approach can successfully find novel event phrases
	and structures, e.g., phrases headed by nouns. Furthermore, the newly mined
	phrases are capable of recognizing additional event descriptions and increasing
	the recall of event recognition.
	Author{1}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1136}
}

@InProceedings{li-EtAl:2016:COLING4,
  author    = {Li, Peifeng  and  Zhu, Qiaoming  and  Zhou, Guodong  and  Wang, Hongling},
  title     = {Global Inference to Chinese Temporal Relation Extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1451--1460},
  abstract  = {Previous studies on temporal relation extraction focus on mining sentence-level
	information or enforcing coherence on different temporal relation types among
	various event mentions in the same sentence or neighboring sentences, largely
	ignoring those discourse-level temporal relations in nonadjacent sentences. In
	this paper, we propose a discourse-level global inference model to mine those
	temporal relations between event mentions in document-level, especially in
	nonadjacent sentences. Moreover, we provide various kinds of discourse-level
	constraints, which derived from event semantics, to further improve our global
	inference model. Evaluation on a Chinese corpus justifies the effectiveness of
	our discourse-level global inference model over two strong baselines.},
  url       = {http://aclweb.org/anthology/C16-1137}
}

@InProceedings{xu-EtAl:2016:COLING1,
  author    = {Xu, Yan  and  Jia, Ran  and  Mou, Lili  and  Li, Ge  and  Chen, Yunchuan  and  Lu, Yangyang  and  Jin, Zhi},
  title     = {Improved relation classification by deep recurrent neural networks with data augmentation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1461--1470},
  abstract  = {Nowadays, neural networks play an important role in the task of relation
	classification. By designing different neural architectures, researchers have
	improved the performance to a large extent in comparison with traditional
	methods. However, existing neural networks for relation classification are
	usually of shallow architectures (e.g., one-layer convolutional neural networks
	or recurrent networks). They may fail to explore the potential representation
	space in different abstraction levels. In this paper, we propose deep recurrent
	neural networks (DRNNs) for relation classification to tackle this challenge.
	Further, we propose a data augmentation method by leveraging the directionality
	of relations. We evaluated our DRNNs on the SemEval-2010 Task~8, and achieve an
	F1-score of 86.1%, outperforming previous state-of-the-art recorded results.},
  url       = {http://aclweb.org/anthology/C16-1138}
}

@InProceedings{jiang-EtAl:2016:COLING1,
  author    = {Jiang, Xiaotian  and  Wang, Quan  and  Li, Peng  and  Wang, Bin},
  title     = {Relation Extraction with Multi-instance Multi-label Convolutional Neural Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1471--1480},
  abstract  = {Distant supervision is an efficient approach that automatically generates
	labeled data for relation extraction (RE). Traditional distantly supervised RE
	systems rely heavily on handcrafted features, and hence suffer from error
	propagation. Recently, a neural network architecture has been proposed to
	automatically extract features for relation classification. However, this
	approach follows the traditional expressed-at-least-once assumption, and fails
	to make full use of information across different sentences. Moreover, it
	ignores the fact that there can be multiple relations holding between the same
	entity pair. In this paper, we propose a multi-instance multi-label
	convolutional neural network for distantly supervised RE. It first relaxes the
	expressed-at-least-once assumption, and employs cross-sentence max-pooling so
	as to enable information sharing across different sentences. Then it handles
	overlapping relations by multi-label learning with a neural network classifier.
	Experimental results show that our approach performs significantly and
	consistently better than state-of-the-art methods.},
  url       = {http://aclweb.org/anthology/C16-1139}
}

@InProceedings{glaser-kuhn:2016:COLING,
  author    = {Glaser, Andrea  and  Kuhn, Jonas},
  title     = {Named Entity Disambiguation for little known referents: a topic-based approach},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1481--1492},
  abstract  = {We propose an approach to Named Entity Disambiguation that avoids a problem of
	standard work on the task (likewise affecting fully supervised, weakly
	supervised, or distantly supervised machine learning techniques): the treatment
	of name mentions referring to people with no (or very little) coverage in the
	textual training data is systematically incorrect. We propose to indirectly
	take into account the property information for the "non-prominent" name
	bearers, such as nationality and profession (e.g., for a Canadian law professor
	named Michael Jackson, with no Wikipedia article, it is very hard to obtain
	reliable textual training data). The target property information for the
	entities is directly available from name authority files, or inferrable, e.g.,
	from listings of sportspeople etc. Our proposed approach employs topic modeling
	to exploit textual training data based on entities sharing the relevant
	properties. In experiments with a pilot implementation of the general approach,
	we show that the approach does indeed work well for name/referent pairs with
	limited textual coverage in the training data.},
  url       = {http://aclweb.org/anthology/C16-1140}
}

@InProceedings{perezbeltrachini-sayed-gardent:2016:COLING,
  author    = {Perez-Beltrachini, Laura  and  SAYED, Rania  and  Gardent, Claire},
  title     = {Building RDF Content for Data-to-Text Generation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1493--1502},
  abstract  = {In Natural Language Generation (NLG), one important limitation is the lack of
	common benchmarks on which to train, evaluate and compare data-to-text
	generators. In this paper, we make one step in that direction and introduce a
	method for automatically creating an arbitrary large repertoire of data units
	that could serve as input for generation.  Using both automated metrics and a
	human evaluation, we show that the data units produced by our method are both
	diverse and coherent.},
  url       = {http://aclweb.org/anthology/C16-1141}
}

@InProceedings{ive-yvon:2016:COLING,
  author    = {Ive, Julia  and  Yvon, Fran\c{c}ois},
  title     = {Parallel Sentence Compression},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1503--1513},
  abstract  = {Sentence compression is a way to perform text simplification and is usually
	handled in a monolingual setting. In this paper, we study ways to extend
	sentence compression in a bilingual context, where the goal is to obtain
	parallel compressions of parallel sentences. This can be beneficial for a
	series of multilingual natural language processing (NLP) tasks. We compare two
	ways to take bilingual information into account when compressing parallel
	sentences. Their efficiency is contrasted on a parallel corpus of News
	articles.},
  url       = {http://aclweb.org/anthology/C16-1142}
}

@InProceedings{ma-deng-yang:2016:COLING,
  author    = {Ma, Shulei  and  Deng, Zhi-Hong  and  Yang, Yunlun},
  title     = {An Unsupervised Multi-Document Summarization Framework Based on Neural Document Model},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1514--1523},
  abstract  = {In the age of information exploding, multi-document summarization is attracting
	particular attention for the ability to help people get the main ideas in a
	short time. Traditional extractive methods simply treat the document set as a
	group of sentences while ignoring the global semantics of the documents.
	Meanwhile, neural document model is effective on representing the semantic
	content of documents in low-dimensional vectors. In this paper, we propose a
	document-level reconstruction framework named DocRebuild, which reconstructs
	the documents with summary sentences through a neural document model and
	selects summary sentences to minimize the reconstruction error. We also apply
	two strategies, sentence filtering and beamsearch, to improve the performance
	of our method. Experimental results on the benchmark datasets DUC 2006 and
	DUC 2007 show that DocRebuild is effective and outperforms the state-of-the-art
	unsupervised algorithms.},
  url       = {http://aclweb.org/anthology/C16-1143}
}

@InProceedings{schwenger-EtAl:2016:COLING,
  author    = {Schwenger, Maximilian  and  Torralba, Alvaro  and  Hoffmann, Joerg  and  Howcroft, David M.  and  Demberg, Vera},
  title     = {From OpenCCG to AI Planning: Detecting Infeasible Edges in Sentence Generation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1524--1534},
  abstract  = {The search space in grammar-based natural language generation tasks
	can get very large, which is particularly problematic when generating
	long utterances or paragraphs. Using surface realization with OpenCCG
	as an example, we show that we can effectively detect partial
	solutions (edges) which cannot ultimately be part of a complete
	sentence because of their syntactic category. Formulating the
	completion of an edge into a sentence as finding a solution path in a
	large state-transition system, we demonstrate a connection to AI
	Planning which is concerned with this kind of problem. We design a
	compilation from OpenCCG into AI Planning allowing the detection of
	infeasible edges via AI Planning dead-end detection methods (proving
	the absence of a solution to the compilation). Our experiments show
	that this can filter out large fractions of infeasible edges in, and
	thus benefit the performance of, complex realization processes.},
  url       = {http://aclweb.org/anthology/C16-1144}
}

@InProceedings{zopf-peyrard-ecklekohler:2016:COLING,
  author    = {Zopf, Markus  and  Peyrard, Maxime  and  Eckle-Kohler, Judith},
  title     = {The Next Step for Multi-Document Summarization: A Heterogeneous Multi-Genre Corpus Built with a Novel Construction Approach},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1535--1545},
  abstract  = {Research in multi-document summarization has focused on newswire corpora since
	the early beginnings. However, the newswire genre provides genre-specific
	features such as sentence position which are easy to exploit in summarization
	systems. Such easy to exploit genre-specific features are available in other
	genres as well. We therefore present the new hMDS corpus for multi-document
	summarization, which contains heterogeneous source documents from multiple text
	genres, as well as summaries with different lengths. For the construction of
	the corpus, we developed a novel construction approach which is suited to build
	large and heterogeneous summarization corpora with little effort. The method
	reverses the usual process of writing summaries for given source documents: it
	combines already available summaries with appropriate source documents. In a
	detailed analysis, we show that our new corpus is significantly different from
	the homogeneous corpora commonly used, and that it is heterogeneous along
	several dimensions. Our experimental evaluation using well-known
	state-of-the-art summarization systems shows that our corpus poses new
	challenges in the field of multi-document summarization. Last but not least, we
	make our corpus publicly available to the research community at the corpus web
	page https://github.com/AIPHES/hMDS.},
  url       = {http://aclweb.org/anthology/C16-1145}
}

@InProceedings{saeidi-EtAl:2016:COLING,
  author    = {Saeidi, Marzieh  and  Bouchard, Guillaume  and  Liakata, Maria  and  Riedel, Sebastian},
  title     = {SentiHood: Targeted Aspect Based Sentiment Analysis Dataset for Urban Neighbourhoods},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1546--1556},
  abstract  = {In this paper, we introduce the task of targeted aspect-based sentiment
	analysis.  The goal is to extract fine-grained information with respect to
	entities mentioned in user comments. This work extends both aspect-based
	sentiment analysis -- that assumes a single entity per document — and
	targeted sentiment analysis — that assumes a single sentiment towards a
	target entity. In particular, we identify the sentiment towards each aspect of
	one or more entities. As a testbed for this task, we introduce the SentiHood
	dataset, extracted from a question answering (QA) platform where urban
	neighbourhoods are discussed by users. In this context units of text often
	mention several aspects of one or more neighbourhoods. This is the first time
	that a generic social media platform,i.e.  QA, is used for fine-grained opinion
	mining.  Text coming from QA platforms are far less constrained compared to
	text from review specific platforms which current datasets are based on. We
	develop several strong baselines, relying on logistic regression and
	state-of-the-art recurrent neural networks},
  url       = {http://aclweb.org/anthology/C16-1146}
}

@InProceedings{jovanoski-pachovski-nakov:2016:COLING,
  author    = {Jovanoski, Dame  and  Pachovski, Veno  and  Nakov, Preslav},
  title     = {On the Impact of Seed Words on Sentiment Polarity Lexicon Induction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1557--1567},
  abstract  = {Sentiment polarity lexicons are key resources for sentiment analysis, and
	researchers have invested a lot of efforts in their manual creation. However,
	there has been a recent shift towards automatically extracted lexicons, which
	are orders of magnitude larger and perform much better. These lexicons are
	typically mined using bootstrapping, starting from very few seed words
	whose polarity is given,
	e.g., 50-60 words, and sometimes even just 5-6. 
	Here we demonstrate that much higher-quality lexicons can be built by starting
	with hundreds of words and phrases as seeds, especially when they are
	in-domain. Thus, we combine (i) mid-sized high-quality manually crafted
	lexicons as seeds and (ii) bootstrapping, in order to build large-scale
	lexicons.},
  url       = {http://aclweb.org/anthology/C16-1147}
}

@InProceedings{somasundaran-EtAl:2016:COLING,
  author    = {Somasundaran, Swapna  and  Riordan, Brian  and  Gyawali, Binod  and  Yoon, Su-Youn},
  title     = {Evaluating Argumentative and Narrative Essays using Graphs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1568--1578},
  abstract  = {This work investigates whether the development of ideas in writing can be
	captured by graph properties derived from the text. Focusing on student essays,
	 we  represent the essay as a graph, and encode a variety of graph properties
	including PageRank as features for modeling essay scores related to quality of
	development. We demonstrate that our approach improves on a state-of-the-art
	system on the task of holistic scoring of persuasive essays and on the task of
	scoring narrative essays along the development dimension.},
  url       = {http://aclweb.org/anthology/C16-1148}
}

@InProceedings{agrawal-an:2016:COLING,
  author    = {Agrawal, Ameeta  and  An, Aijun},
  title     = {Selective Co-occurrences for Word-Emotion Association},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1579--1590},
  abstract  = {Emotion classification from text typically requires some degree of word-emotion
	association, either gathered from pre-existing emotion lexicons or calculated
	using some measure of semantic relatedness. Most emotion lexicons contain a
	fixed number of emotion categories and provide a rather limited coverage.
	Current measures of computing semantic relatedness, on the other hand, do not
	adapt well to the specific task of word-emotion association and therefore,
	yield average results. In this work, we propose an unsupervised method of
	learning word-emotion association from large text corpora, called Selective
	Co-occurrences (SECO), by leveraging the property of mutual exclusivity
	generally exhibited by emotions. Extensive evaluation, using just one seed word
	per emotion category, indicates the effectiveness of the proposed approach over
	three emotion lexicons and two state-of-the-art models of word embeddings on
	three datasets from different domains.},
  url       = {http://aclweb.org/anthology/C16-1149}
}

@InProceedings{li-EtAl:2016:COLING5,
  author    = {Li, Bofang  and  Zhao, Zhe  and  Liu, Tao  and  Wang, Puwei  and  Du, Xiaoyong},
  title     = {Weighted Neural Bag-of-n-grams Model: New Baselines for Text Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1591--1600},
  abstract  = {NBSVM is one of the most popular methods for text classification and has been
	widely used as baselines for various text representation approaches. It uses
	Naive Bayes (NB) feature to weight sparse bag-of-n-grams representation. N-gram
	captures word order in short context and NB feature assigns more weights to
	those important words. However, NBSVM suffers from sparsity problem and is
	reported to be exceeded by newly proposed distributed (dense) text
	representations learned by neural networks. In this paper, we transfer the
	n-grams and NB weighting to neural models. We train n-gram embeddings and use
	NB weighting to guide the neural models to focus on important words. In fact,
	our methods can be viewed as distributed (dense) counterparts of sparse
	bag-of-n-grams in NBSVM. We discover that n-grams and NB weighting are also
	effective in distributed representations. As a result, our models achieve new
	strong baselines on 9 text classification datasets, e.g. on IMDB dataset, we
	reach performance of 93.5\% accuracy, which exceeds previous state-of-the-art
	results obtained by deep neural models. All source codes are publicly available
	at https://github.com/zhezhaoa/neural\_BOW\_toolkit.},
  url       = {http://aclweb.org/anthology/C16-1150}
}

@InProceedings{poria-EtAl:2016:COLING,
  author    = {Poria, Soujanya  and  Cambria, Erik  and  Hazarika, Devamanyu  and  Vij, Prateek},
  title     = {A Deeper Look into Sarcastic Tweets Using Deep Convolutional Neural Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1601--1612},
  abstract  = {Sarcasm detection is a key task for many natural language processing tasks. In
	sentiment analysis, for example, sarcasm can flip the polarity of an
	“apparently positive” sentence and, hence, negatively affect polarity
	detection performance. To date, most approaches to sarcasm detection have
	treated the task primarily as a text categorization problem. Sarcasm, however,
	can be expressed in very subtle ways and requires a deeper understanding of
	natural language that standard text categorization techniques cannot grasp. In
	this work, we develop models based on a pre-trained convolutional neural
	network for extracting sentiment, emotion and personality features for sarcasm
	detection. Such features, along with the network’s baseline features, allow
	the proposed models to outperform the state of the art on benchmark datasets.
	We also address the often ignored generalizability issue of classifying data
	that have not been seen by the models at learning phase.},
  url       = {http://aclweb.org/anthology/C16-1151}
}

@InProceedings{barnes-lambert-badia:2016:COLING,
  author    = {Barnes, Jeremy  and  Lambert, Patrik  and  Badia, Toni},
  title     = {Exploring Distributional Representations and Machine Translation for Aspect-based Cross-lingual Sentiment Classification.},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1613--1623},
  abstract  = {Cross-lingual sentiment classification} (CLSC) seeks to use resources from a
	source language in order to detect sentiment and classify text in a target
	language. Almost all research into CLSC has been carried out at sentence and
	document level, although this level of granularity is often less useful. This
	paper explores methods for performing aspect-based cross-lingual sentiment
	classification (aspect-based CLSC) for under-resourced languages. Given the
	limited nature of parallel data for many languages, we would like to make the
	most of this resource for our task.  We compare zero-shot learning, bilingual
	word embeddings, stacked denoising autoencoder representations and machine
	translation techniques for aspect-based CLSC. Each of these approaches requires
	differing amounts of parallel data. We show that models based on distributed
	semantics can achieve comparable results to machine translation on aspect-based
	CLSC and give an analysis of the errors found for each method.},
  url       = {http://aclweb.org/anthology/C16-1152}
}

@InProceedings{wang-EtAl:2016:COLING4,
  author    = {Wang, Zhongqing  and  Zhang, Yue  and  Lee, Sophia  and  Li, Shoushan  and  Zhou, Guodong},
  title     = {A Bilingual Attention Network for Code-switched Emotion Prediction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1624--1634},
  abstract  = {Emotions in code-switching text can be expressed in either monolingual or
	bilingual forms. However, relatively little research has emphasized on
	code-switching text. In this paper, we propose a Bilingual Attention Network
	(BAN) model to aggregate the monolingual and bilingual informative words to
	form vectors from the document representation, and integrate the attention
	vectors to predict the emotion. The experiments show that the effectiveness of
	the proposed model. Visualization of the attention layers illustrates that the
	model selects qualitatively informative words.},
  url       = {http://aclweb.org/anthology/C16-1153}
}

@InProceedings{chen-ku:2016:COLING,
  author    = {Chen, Wei-Fan  and  Ku, Lun-Wei},
  title     = {UTCNN: a Deep Learning Model of Stance Classification on Social Media Text},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1635--1645},
  abstract  = {Most neural network models for document classification on social media focus on
	text information to the neglect of other information on these platforms. In
	this paper, we classify post stance on social media channels and develop UTCNN,
	a neural network model that incorporates user tastes, topic tastes, and user
	comments on posts. UTCNN not only works on social media texts, but also
	analyzes texts in forums and message boards. Experiments performed on Chinese
	Facebook data and English online debate forum data show that UTCNN achieves a
	0.755 macro average f-score for supportive, neutral, and unsupportive stance
	classes on Facebook data, which is significantly better than models in which
	either user, topic, or comment information is withheld. This model design
	greatly mitigates the lack of data for the minor class. In addition, UTCNN
	yields a 0.842 accuracy on English online debate forum data, which also
	significantly outperforms results from previous work, showing that UTCNN
	performs well regardless of language or platform.},
  url       = {http://aclweb.org/anthology/C16-1154}
}

@InProceedings{cornudella-poibeau-vantrijp:2016:COLING,
  author    = {Cornudella, Miquel  and  Poibeau, Thierry  and  van Trijp, Remi},
  title     = {The Role of Intrinsic Motivation in Artificial Language Emergence: a Case Study on Colour},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1646--1656},
  abstract  = {Human languages have multiple strategies that allow us to discriminate objects
	in a vast variety of contexts. Colours have been extensively studied from this
	point of view. In particular, previous research in artificial language
	evolution has shown how artificial languages may emerge based on specific
	strategies to distinguish colours. Still, it has not been shown how several
	strategies of diverse complexity can be autonomously managed by artificial
	agents . We propose an intrinsic motivation system that allows agents in a
	population to create a shared artificial language and progressively increase
	its expressive power. Our results show that with such a system agents
	successfully regulate their language development, which indicates a relation
	between population size and consistency in the emergent communicative systems.},
  url       = {http://aclweb.org/anthology/C16-1155}
}

@InProceedings{hayashi:2016:COLING,
  author    = {Hayashi, Yoshihiko},
  title     = {Predicting the Evocation Relation between Lexicalized Concepts},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1657--1668},
  abstract  = {Evocation is a directed yet weighted semantic relationship between lexicalized
	concepts. Although evocation relations are considered potentially useful in
	several semantic NLP tasks, the prediction of the evocation relation between an
	arbitrary pair of concepts remains difficult, since evocation relationships
	cover a broader range of semantic relations rooted in human perception and
	experience. This paper presents a supervised learning approach to predict the
	strength (by regression) and to determine the directionality (by
	classification) of the evocation relation that might hold between a pair of
	lexicalized concepts. Empirical results that were obtained by investigating
	useful features are shown, indicating that a combination of the proposed
	features largely outperformed individual baselines, and also suggesting that
	semantic relational vectors computed from existing semantic vectors for
	lexicalized concepts were indeed effective for both the prediction of strength
	and the determination of directionality.},
  url       = {http://aclweb.org/anthology/C16-1156}
}

@InProceedings{paetzold-specia:2016:COLING2,
  author    = {Paetzold, Gustavo  and  Specia, Lucia},
  title     = {Collecting and Exploring Everyday Language for Predicting Psycholinguistic Properties of Words},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1669--1679},
  abstract  = {Exploring language usage through frequency analysis in large corpora is a
	defining feature in most recent work in corpus and computational linguistics.
	From a psycholinguistic perspective, however, the corpora used in these
	contributions are often not representative of language usage: they are either
	domain-specific, limited in size, or extracted from unreliable sources. In an
	effort to address this limitation, we introduce SubIMDB, a corpus of everyday
	language spoken text we created which contains over 225 million words. The
	corpus was extracted from 38,102 subtitles of family, comedy and children
	movies and series, and is the first sizeable structured corpus of subtitles
	made available. Our experiments show that word frequency norms extracted from
	this corpus are more effective than those from well-known norms such as
	Kucera-Francis, HAL and SUBTLEXus in predicting various psycholinguistic
	properties of words, such as lexical decision times, familiarity, age of
	acquisition and simplicity. We also provide evidence that contradict the
	long-standing assumption that the ideal size for a corpus can be determined
	solely based on how well its word frequencies correlate with lexical decision
	times.},
  url       = {http://aclweb.org/anthology/C16-1157}
}

@InProceedings{wachsmuth-alkhatib-stein:2016:COLING,
  author    = {Wachsmuth, Henning  and  Al Khatib, Khalid  and  Stein, Benno},
  title     = {Using Argument Mining to Assess the Argumentation Quality of Essays},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1680--1691},
  abstract  = {Argument mining aims to determine the argumentative structure of texts.
	Although it is said to be crucial for future applications such as writing
	support systems, the benefit of its output has rarely been evaluated. This
	paper puts the analysis of the output into the focus.  In particular, we
	investigate to what extent the mined structure can be leveraged to assess the
	argumentation quality of persuasive essays. We find insightful statistical
	patterns in the structure of essays. From these, we derive novel features that
	we evaluate in four argumentation-related essay scoring tasks. Our results
	reveal the benefit of argument mining for assessing argumentation quality.
	Among others, we improve the state of the art in scoring an essay's
	organization and its argument strength.},
  url       = {http://aclweb.org/anthology/C16-1158}
}

@InProceedings{wang-andersen:2016:COLING,
  author    = {Wang, Shuhan  and  Andersen, Erik},
  title     = {Grammatical Templates: Improving Text Difficulty Evaluation for Language Learners},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1692--1702},
  abstract  = {Language students are most engaged while reading texts at an appropriate
	difficulty level. However, existing methods of evaluating text difficulty focus
	mainly on vocabulary and do not prioritize grammatical features,  hence they do
	not work well for language learners with limited knowledge of grammar. In this
	paper, we introduce grammatical templates, the expert-identified units of
	grammar that students learn from class, as an important feature of text
	difficulty evaluation. Experimental classification results show that
	grammatical template features significantly improve text difficulty prediction
	accuracy over baseline readability features by 7.4%. Moreover,we build a simple
	and human-understandable text difficulty evaluation approach with 87.7%
	accuracy, using only 5 grammatical template features.},
  url       = {http://aclweb.org/anthology/C16-1159}
}

@InProceedings{schnober-EtAl:2016:COLING,
  author    = {Schnober, Carsten  and  Eger, Steffen  and  Do Dinh, Erik-L\^{a}n  and  Gurevych, Iryna},
  title     = {Still not there? Comparing Traditional Sequence-to-Sequence Models to Encoder-Decoder Neural Networks on Monotone String Translation Tasks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1703--1714},
  abstract  = {We analyze the performance of encoder-decoder neural models and compare them
	with well-known established methods. The latter represent different classes of
	traditional approaches that are applied to the monotone sequence-to-sequence
	tasks OCR post-correction, spelling correction, grapheme-to-phoneme conversion,
	and lemmatization.
	Such tasks are of practical relevance for various higher-level research fields
	including digital humanities, automatic text correction, and speech
	recognition. 
	We investigate how well generic deep-learning approaches adapt to these tasks,
	and how they perform in comparison with established and more specialized
	methods, including our own adaptation of pruned CRFs.},
  url       = {http://aclweb.org/anthology/C16-1160}
}

@InProceedings{jiang-EtAl:2016:COLING2,
  author    = {Jiang, Tingsong  and  Liu, Tianyu  and  Ge, Tao  and  Sha, Lei  and  Chang, Baobao  and  Li, Sujian  and  Sui, Zhifang},
  title     = {Towards Time-Aware Knowledge Graph Completion},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1715--1724},
  abstract  = {Knowledge graph (KG) completion adds new facts to a KG by making inferences
	from existing
	facts. Most existing methods ignore the time information and only learn from
	time-unknown
	fact triples. In dynamic environments that evolve over time, it is important
	and challenging
	for knowledge graph completion models to take into account the temporal aspects
	of facts. In
	this paper, we present a novel time-aware knowledge graph completion model that
	is able to
	predict links in a KG using both the existing facts and the temporal
	information of the facts. To
	incorporate the happening time of facts, we propose a time-aware KG embedding
	model using
	temporal order information among facts. To incorporate the valid time of facts,
	we propose
	a joint time-aware inference model based on Integer Linear Programming (ILP)
	using temporal
	consistencyinformationasconstraints.
	Wefurtherintegratetwomodelstomakefulluseofglobal
	temporal information. We empirically evaluate our models on time-aware KG
	completion task.
	Experimental results show that our time-aware models achieve the
	state-of-the-art on temporal
	facts consistently.
	Author{7}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1161}
}

@InProceedings{dadashkarimi-jalilisabet-shakery:2016:COLING,
  author    = {Dadashkarimi, Javid  and  Jalili Sabet, Masoud  and  Shakery, Azadeh},
  title     = {Learning to Weight Translations using Ordinal Linear Regression and Query-generated Training Data for Ad-hoc Retrieval with Long Queries},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1725--1733},
  abstract  = {Ordinal regression which is known with learning to rank has long been used in
	information retrieval (IR). Learning to rank algorithms, have been tailored in
	document ranking, information filtering, and building large aligned corpora
	successfully.
	In this paper, we propose to use this algorithm for query modeling in
	cross-language environments.
	To this end, first we build a query-generated training data using
	pseudo-relevant documents to the query and all translation candidates.
	The pseudo-relevant documents are obtained by top-ranked documents in response
	to a translation of the original query.
	The class of each candidate in the training data is determined based on
	presence/absence of the candidate in the pseudo-relevant documents.
	We learn an ordinal regression model to score the candidates based on their
	relevance to the context of the query, and after that, we construct a
	query-dependent translation model using a softmax function. Finally, we
	re-weight the query based on the obtained model. 
	Experimental results on French, German, Spanish, and Italian CLEF collections
	demonstrate that the proposed method achieves better results compared to
	state-of-the-art cross-language information retrieval methods, particularly in
	long queries with large training data.},
  url       = {http://aclweb.org/anthology/C16-1162}
}

@InProceedings{romeo-EtAl:2016:COLING,
  author    = {Romeo, Salvatore  and  Da San Martino, Giovanni  and  Barr\'{o}n-Cede\~{n}o, Alberto  and  Moschitti, Alessandro  and  Belinkov, Yonatan  and  Hsu, Wei-Ning  and  Zhang, Yu  and  Mohtarami, Mitra  and  Glass, James},
  title     = {Neural Attention for Learning to Rank Questions in Community Question Answering},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1734--1745},
  abstract  = {In real-world data, e.g., from Web forums,  text is often contaminated with
	redundant or irrelevant content, which leads to introducing noise in machine
	learning algorithms.
	In this paper, we apply Long Short-Term Memory networks with an attention
	mechanism, which can select important parts of text for the task of similar
	question retrieval from community Question Answering (cQA) forums. 
	In particular, we use the attention weights for both selecting entire sentences
	and their subparts, i.e., word/chunk, from shallow syntactic trees. More
	interestingly, we apply tree kernels to the filtered text representations, thus
	exploiting the implicit features of the subtree space for learning question
	reranking. Our results show that the attention-based pruning allows for
	achieving the top position in the cQA challenge of SemEval 2016, with a
	relatively large gap from the other participants while greatly decreasing
	running time.},
  url       = {http://aclweb.org/anthology/C16-1163}
}

@InProceedings{yin-EtAl:2016:COLING,
  author    = {Yin, Wenpeng  and  Yu, Mo  and  Xiang, Bing  and  Zhou, Bowen  and  Sch\"{u}tze, Hinrich},
  title     = {Simple Question Answering by Attentive Convolutional Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1746--1756},
  abstract  = {This work focuses on answering single-relation factoid questions over Freebase.
	Each question can acquire the answer from a single fact of form (subject,
	predicate, object) in Freebase.  This task, simple question answering
	(SimpleQA), can be addressed via a two-step pipeline: entity linking and fact
	selection. In fact selection, we match the subject entity in a fact candidate
	with the entity mention in the question by a character-level convolutional
	neural network (char-CNN), and match the predicate in that fact with the
	question by a word-level CNN (word-CNN). This work makes two main
	contributions. (i) A simple and effective entity linker over Freebase is
	proposed. Our entity linker outperforms the
	state-of-the-art entity linker over SimpleQA task. (ii) A novel attentive
	maxpooling is stacked over word-CNN, so that the predicate representation can
	be matched with the predicate-focused question representation more effectively.
	Experiments show that our system sets new state-of-the-art in this task.},
  url       = {http://aclweb.org/anthology/C16-1164}
}

@InProceedings{semeniuta-severyn-barth:2016:COLING,
  author    = {Semeniuta, Stanislau  and  Severyn, Aliaksei  and  Barth, Erhardt},
  title     = {Recurrent Dropout without Memory Loss},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1757--1766},
  abstract  = {This paper presents a novel approach to recurrent neural network (RNN)
	regularization. Differently from the widely adopted dropout method, which is
	applied to forward connections of feedforward architectures or RNNs, we propose
	to drop neurons directly in recurrent connections in a way that does not cause
	loss of long-term memory. Our approach is as easy to implement and apply as the
	regular feed-forward dropout and we demonstrate its effectiveness for the most
	effective modern recurrent network -- Long Short-Term Memory network. Our
	experiments on three NLP benchmarks show consistent improvements even when
	combined with conventional feed-forward dropout.},
  url       = {http://aclweb.org/anthology/C16-1165}
}

@InProceedings{balikas-EtAl:2016:COLING,
  author    = {Balikas, Georgios  and  Amoualian, Hesam  and  Clausel, Marianne  and  Gaussier, Eric  and  Amini, Massih R},
  title     = {Modeling topic dependencies in semantically coherent text spans with copulas},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1767--1776},
  abstract  = {The exchangeability assumption in topic models like Latent Dirichlet Allocation
	(LDA) often results in inferring inconsistent topics for the words of text
	spans like noun-phrases, which are usually expected to be topically coherent.
	We propose copulaLDA, that extends LDA by integrating  part of the text
	structure to the model and relaxes the conditional independence assumption
	between the word-specific latent topics given the per-document topic
	distributions. To this end, we assume that the words of text spans like
	noun-phrases are topically bound and we model this dependence with copulas. 
	We demonstrate empirically the effectiveness of copulaLDA on both intrinsic and
	extrinsic evaluation tasks on several publicly available corpora.},
  url       = {http://aclweb.org/anthology/C16-1166}
}

@InProceedings{cui-EtAl:2016:COLING,
  author    = {Cui, Yiming  and  Liu, Ting  and  Chen, Zhipeng  and  Wang, Shijin  and  Hu, Guoping},
  title     = {Consensus Attention-based Neural Networks for Chinese Reading Comprehension},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1777--1786},
  abstract  = {Reading comprehension has embraced a booming in recent NLP research.
	Several institutes have released the Cloze-style reading comprehension data,
	and these have greatly accelerated the research of machine comprehension.
	In this work, we firstly present Chinese reading comprehension datasets, which
	consist of People Daily news dataset and Children's Fairy Tale (CFT) dataset.
	Also, we propose a consensus attention-based neural network architecture to
	tackle the 
	Cloze-style reading comprehension problem, which aims to induce a consensus
	attention over every words in the query.
	Experimental results show that the proposed neural network significantly
	outperforms the state-of-the-art baselines in several public datasets. 
	Furthermore, we setup a baseline for Chinese reading comprehension task, 
	and hopefully this would speed up the process for future research.},
  url       = {http://aclweb.org/anthology/C16-1167}
}

@InProceedings{felt-ringger-seppi:2016:COLING,
  author    = {Felt, Paul  and  Ringger, Eric  and  Seppi, Kevin},
  title     = {Semantic Annotation Aggregation with Conditional Crowdsourcing Models and Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1787--1796},
  abstract  = {In modern text annotation projects, crowdsourced annotations are 
	often aggregated using item response models 
	or by majority vote. 
	Recently, item response models enhanced with generative 
	data models have been shown to yield substantial benefits 
	over those with conditional or no data models. 
	However, suitable generative data models do not exist for 
	many tasks, such as semantic labeling tasks. 
	When no generative data model exists, 
	we demonstrate that similar benefits may 
	be derived by conditionally modeling documents that have 
	been previously embedded in a semantic space using recent work in 
	vector space models. 
	We use this approach to show state-of-the-art results on a variety of 
	semantic annotation aggregation tasks.},
  url       = {http://aclweb.org/anthology/C16-1168}
}

@InProceedings{ye-zhang-cai:2016:COLING,
  author    = {Ye, Na  and  Zhang, Guiping  and  Cai, Dongfeng},
  title     = {Interactive-Predictive Machine Translation based on Syntactic Constraints of Prefix},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1797--1806},
  abstract  = {Interactive-predictive machine translation (IPMT) is a translation mode which
	combines machine translation technology and human behaviours. In the IPMT
	system, the utilization of the prefix greatly affects the interaction
	efficiency. However, state-of-the-art methods filter translation hypotheses
	mainly according to their matching results with the prefix on character level,
	and the advantage of the prefix is not fully developed. Focusing on this
	problem, this paper mines the deep constraints of prefix on syntactic level to
	improve the performance of IPMT systems. Two syntactic subtree matching rules
	based on phrase structure grammar are proposed to filter the translation
	hypotheses more strictly. Experimental results on LDC Chinese-English corpora
	show that the proposed method outperforms state-of-the-art phrase-based IPMT
	system while keeping comparable decoding speed.},
  url       = {http://aclweb.org/anthology/C16-1169}
}

@InProceedings{zhang-EtAl:2016:COLING3,
  author    = {Zhang, Jian  and  Li, Liangyou  and  Way, Andy  and  Liu, Qun},
  title     = {Topic-Informed Neural Machine Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1807--1817},
  abstract  = {In recent years, neural machine translation (NMT) has demonstrated
	state-of-the-art machine translation (MT) performance. It is a new approach to
	MT, which tries to learn a set of parameters to maximize the conditional
	probability of target sentences given source sentences. In this paper, we
	present a novel approach to improve the translation performance in NMT by
	conveying topic knowledge during translation. The proposed topic-informed NMT
	can increase the likelihood of
	selecting words from the same topic and domain for translation. Experimentally,
	we demonstrate that topic-informed NMT can achieve a 1.15 (3.3% relative) and
	1.67 (5.4% relative) absolute improvement in BLEU score on the
	Chinese-to-English language pair using NIST 2004 and 2005 test sets,
	respectively, compared to NMT without topic information.},
  url       = {http://aclweb.org/anthology/C16-1170}
}

@InProceedings{cao-EtAl:2016:COLING2,
  author    = {Cao, Hailong  and  Zhao, Tiejun  and  ZHANG, Shu  and  Meng, Yao},
  title     = {A Distribution-based Model to Learn Bilingual Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1818--1827},
  abstract  = {We introduce a distribution based model to learn bilingual word embeddings from
	monolingual data. It is simple, effective and does not require any parallel
	data or any seed lexicon. We take advantage of the fact that word embeddings
	are usually in form of dense real-valued low-dimensional vector and therefore
	the distribution of them can be accurately estimated. A novel cross-lingual
	learning objective is proposed which directly matches the distributions of word
	embeddings in one language with that in the other language. During the joint
	learning process, we dynamically estimate the distributions of word embeddings
	in two languages respectively and minimize the dissimilarity between them
	through standard back propagation algorithm. Our learned bilingual word
	embeddings allow to group each word and its translations together in the shared
	vector space. We demonstrate the utility of the learned embeddings on the task
	of finding word-to-word translations from monolingual corpora. Our model
	achieved encouraging performance on data in both related languages and
	substantially different languages.},
  url       = {http://aclweb.org/anthology/C16-1171}
}

@InProceedings{niehues-EtAl:2016:COLING,
  author    = {Niehues, Jan  and  Cho, Eunah  and  Ha, Thanh-Le  and  Waibel, Alex},
  title     = {Pre-Translation for Neural Machine Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1828--1836},
  abstract  = {Recently, the development of neural machine translation (NMT) has significantly
	improved the translation quality of automatic machine translation. While most
	sentences are more accurate and fluent than translations by statistical machine
	translation (SMT)-based systems, in some cases, the NMT system produces
	translations that have a completely different meaning. This is especially the
	case when rare words occur.
	When using statistical machine translation, it has already been shown that
	significant gains can be achieved by simplifying the input in a preprocessing
	step. A commonly used example is the pre-reordering approach.
	In this work, we used phrase-based machine translation to pre-translate the
	input into the target language. Then a neural machine translation system
	generates the final hypothesis using the pre-translation. Thereby, we use
	either only the output of the phrase-based machine translation (PBMT) system or
	a combination of the PBMT output and the source sentence. 
	We evaluate the technique on the English to German translation task.
	Using this approach we are able to outperform the PBMT system as well as the
	baseline neural MT system by up to 2 BLEU points. We analyzed the influence of
	the quality of the initial system on the final result.},
  url       = {http://aclweb.org/anthology/C16-1172}
}

@InProceedings{claveau-kijak:2016:COLING,
  author    = {Claveau, Vincent  and  Kijak, Ewa},
  title     = {Direct vs. indirect evaluation of distributional thesauri},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1837--1848},
  abstract  = {With the success of word embedding methods in various Natural Language
	Processing tasks,
	all the field of distributional semantics has experienced a renewed interest.
	Beside the famous
	word2vec, recent studies have presented efficient techniques to build
	distributional thesaurus; in
	particular, Claveau et al. (2014) have already shown that Information Retrieval
	(IR) tools and
	concepts can be successfully used to build a thesaurus. In this paper, we
	address the problem
	of the evaluation of such thesauri or embedding models and compare their
	results. Through
	several experiments and by evaluating directly the results with reference
	lexicons, we show that
	the recent IR-based distributional models outperform state-of-the-art systems
	such as word2vec.
	Following the work of Claveau and Kijak (2016), we use IR as an applicative
	framework to
	indirectly evaluate the generated thesaurus. Here again, this task-based
	evaluation validates the
	IR approach used to build the thesaurus. Moreover, it allows us to compare
	these results with
	those from the direct evaluation framework used in the literature. The observed
	differences bring
	these evaluation habits into question.},
  url       = {http://aclweb.org/anthology/C16-1173}
}

@InProceedings{jameel-schockaert:2016:COLING,
  author    = {Jameel, Shoaib  and  Schockaert, Steven},
  title     = {D-GloVe: A Feasible Least Squares Model for Estimating Word Embedding Densities},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1849--1860},
  abstract  = {We propose a new word embedding model, inspired by GloVe, which is formulated
	as a feasible least squares optimization problem. In contrast to existing
	models, we explicitly represent the uncertainty about the exact definition of
	each word vector. To this end, we estimate the error that results from using
	noisy co-occurrence counts in the formulation of the model, and we model the
	imprecision that results from including uninformative context words. Our
	experimental results demonstrate that this model compares favourably with
	existing word embedding models.},
  url       = {http://aclweb.org/anthology/C16-1174}
}

@InProceedings{dedeyne-perfors-navarro:2016:COLING,
  author    = {De Deyne, Simon  and  Perfors, Amy  and  Navarro, Daniel J},
  title     = {Predicting human similarity judgments with distributional models: The value of word associations.},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1861--1870},
  abstract  = {Most distributional lexico-semantic models derive their representations based
	on external language resources such as text corpora. In this study, we propose
	that internal language models, that are more closely aligned to the mental
	representations of words could provide important insights into cognitive
	science, including linguistics. Doing so allows us to reflect upon theoretical
	questions regarding the structure of the mental lexicon, and also puts into
	perspective a number of assumptions underlying recently proposed distributional
	text-based models.
	In particular, we focus on word-embedding models which have been proposed to
	learn aspects of word meaning in a manner similar to humans. These are
	contrasted with internal language models derived from a new extensive data set
	of word associations. 
	Using relatedness and similarity judgments we evaluate these models and find
	that the word-association-based internal language models consistently
	outperform current state-of-the art text-based external language models, often
	with a large margin. These results are not just a performance improvement; they
	also have implications for our understanding of how distributional knowledge is
	used by people.},
  url       = {http://aclweb.org/anthology/C16-1175}
}

@InProceedings{yamane-EtAl:2016:COLING,
  author    = {Yamane, Josuke  and  Takatani, Tomoya  and  Yamada, Hitoshi  and  Miwa, Makoto  and  Sasaki, Yutaka},
  title     = {Distributional Hypernym Generation by Jointly Learning Clusters and Projections},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1871--1879},
  abstract  = {We propose a novel word embedding-based hypernym generation model that jointly
	learns clusters of hyponym-hypernym relations, i.e., hypernymy, and projections
	from hyponym to hypernym embeddings. Most of the recent hypernym detection
	models focus on a hypernymy classification problem that determines whether a
	pair of words is in hypernymy or not. These models do not directly deal with a
	hypernym generation problem in that a model generates hypernyms for a given
	word. Differently from previous studies, our model jointly learns the clusters
	and projections with adjusting the number of clusters so that the number of
	clusters can be determined depending on the learned projections and vice versa.
	Our model also boosts the performance by incorporating inner product-based
	similarity measures and negative examples, i.e., sampled non-hypernyms, into
	our objectives in learning. We evaluated our joint learning models on the task
	of Japanese and English hypernym generation and showed a significant
	improvement over an existing pipeline model. Our model also compared favorably
	to existing distributed hypernym detection models on the English hypernym
	classification task.},
  url       = {http://aclweb.org/anthology/C16-1176}
}

@InProceedings{hou:2016:COLING,
  author    = {Hou, Yufang},
  title     = {Incremental Fine-grained Information Status Classification Using Attention-based LSTMs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1880--1890},
  abstract  = {Information status plays an important role in discourse processing. According
	to the hearer’s common sense knowledge and his comprehension of the preceding
	text, a discourse entity could be old, mediated or new. In this paper, we
	propose an attention-based LSTM model to address the problem of fine-grained
	information status classification in an incremental manner. Our approach
	resembles how human beings process the task, i.e., decide the information
	status of the current discourse entity based on its preceding context.
	Experimental results on the ISNotes corpus (Markert et al., 2012) reveal that
	(1) despite its moderate result, our model with only word embedding features
	captures the necessary semantic knowledge needed for the task by a large
	extent; and (2) when incorporating with additional several simple features, our
	model achieves the competitive results compared to the state-of-the-art
	approach (Hou et al., 2013) which heavily depends on lots of hand-crafted
	semantic features.},
  url       = {http://aclweb.org/anthology/C16-1177}
}

@InProceedings{shih-chen:2016:COLING,
  author    = {Shih, Yong-Siang  and  Chen, Hsin-Hsi},
  title     = {Detection, Disambiguation and Argument Identification of Discourse Connectives in Chinese Discourse Parsing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1891--1902},
  abstract  = {In this paper, we investigate four important issues together for explicit
	discourse relation labelling in Chinese texts: (1) discourse connective
	extraction, (2) linking ambiguity resolution, (3) relation type disambiguation,
	and (4) argument boundary identification. In a pipelined Chinese discourse
	parser, we identify potential connective candidates by string matching,
	eliminate non-discourse usages from them with a binary classifier, resolve
	linking ambiguities among connective components by ranking, disambiguate
	relation types by a multiway classifier, and determine the argument boundaries
	by conditional random fields. The experiments on Chinese Discourse Treebank
	show that the F1 scores of 0.7506, 0.7693, 0.7458, and 0.3134 are achieved for
	discourse usage disambiguation, linking disambiguation, relation type
	disambiguation, and argument boundary identification, respectively, in a
	pipelined Chinese discourse parser.},
  url       = {http://aclweb.org/anthology/C16-1178}
}

@InProceedings{braud-plank-sogaard:2016:COLING,
  author    = {Braud, Chlo\'{e}  and  Plank, Barbara  and  S{\o}gaard, Anders},
  title     = {Multi-view and multi-task training of RST discourse parsers},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1903--1913},
  abstract  = {We experiment with different ways of training LSTM networks to predict RST
	discourse trees. The main challenge for RST discourse parsing is the limited
	amounts of training data. We combat this by regularizing our models using task
	supervision from related tasks as well as alternative views on discourse
	structures. We show that a simple LSTM sequential discourse parser takes
	advantage of this multi-view and multi-task framework with 12-15% error
	reductions over our baseline (depending on the metric) and results that rival
	more complex state-of-the-art parsers.},
  url       = {http://aclweb.org/anthology/C16-1179}
}

@InProceedings{qin-zhang-zhao:2016:COLING,
  author    = {Qin, Lianhui  and  Zhang, Zhisong  and  Zhao, Hai},
  title     = {Implicit Discourse Relation Recognition with Context-aware Character-enhanced Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1914--1924},
  abstract  = {For the task of implicit discourse relation recognition, traditional models
	utilizing manual features can suffer from data sparsity problem. Neural models
	provide a solution with distributed representations, which could encode the
	latent semantic information, and are suitable for recognizing semantic
	relations between argument pairs. However, conventional vector representations
	usually adopt embeddings at the word level and cannot well handle the rare word
	problem without carefully considering morphological information at character
	level. Moreover, embeddings are assigned to individual words independently,
	which lacks of the crucial contextual information. This paper proposes a neural
	model utilizing context-aware character-enhanced embeddings to alleviate the
	drawbacks of the current word level representation. Our experiments show that
	the enhanced embeddings work well and the proposed model obtains
	state-of-the-art results.},
  url       = {http://aclweb.org/anthology/C16-1180}
}

@InProceedings{pluss-piwek:2016:COLING,
  author    = {Pl\"{u}ss, Brian  and  Piwek, Paul},
  title     = {Measuring Non-cooperation in Dialogue},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1925--1936},
  abstract  = {This paper introduces a novel method for measuring non-cooperation in dialogue.
	The key idea is that linguistic non-cooperation can be measured in terms of the
	extent to which dialogue participants deviate from conventions regarding the
	proper introduction and discharging of conversational obligations (e.g., the
	obligation to respond to a question). Previous work on non cooperation has
	focused mainly on non-linguistic task-related non-cooperation or modelled
	non-cooperation in terms of special rules describing non-cooperative
	behaviours. In contrast, we start from rules for normal/correct dialogue
	behaviour - i.e., a dialogue game - which in principle can be derived from a
	corpus of cooperative dialogues, and provide a quantitative measure for the
	degree to which participants comply with these rules. We evaluated the model on
	a corpus of political interviews, with encouraging results. The model predicts
	accurately the degree of cooperation for one of the two dialogue game roles
	(interviewer) and also the relative cooperation for both roles (i.e., which
	interlocutor in the conversation was most cooperative). Being able to measure
	cooperation has applications in many areas from the analysis - manual, semi and
	fully automatic - of natural language interactions to human-like virtual
	personal assistants, tutoring agents, sophisticated dialogue systems, and
	role-playing virtual humans.},
  url       = {http://aclweb.org/anthology/C16-1181}
}

@InProceedings{derczynski:2016:COLING,
  author    = {Derczynski, Leon},
  title     = {Representation and Learning of Temporal Relations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1937--1948},
  abstract  = {Determining the relative order of events and times described in text is an
	important problem in natural language processing. It is also a difficult one:
	general state-of-the-art performance has been stuck at a relatively low ceiling
	for years. We investigate the representation of temporal relations, and
	empirically evaluate the effect that various temporal relation representations
	have on machine learning performance. While machine learning performance
	decreases with increased representational expressiveness, not all
	representation simplifications have equal impact.},
  url       = {http://aclweb.org/anthology/C16-1182}
}

@InProceedings{upadhyay-EtAl:2016:COLING,
  author    = {Upadhyay, Shyam  and  Gupta, Nitish  and  Christodoulopoulos, Christos  and  Roth, Dan},
  title     = {Revisiting the Evaluation for Cross Document Event Coreference},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1949--1958},
  abstract  = {Cross document event coreference (CDEC) is an important task that aims at
	aggregating event- related information across multiple documents. We revisit
	the evaluation for CDEC, and dis- cover that past works have adopted different,
	often inconsistent, evaluation settings, which either overlook certain mistakes
	in coreference decisions, or make assumptions that simplify the coreference
	task considerably. We suggest a new evaluation methodology which overcomes
	these limitations, and allows for an accurate assessment of CDEC systems. Our
	new evaluation setting better reflects the corpus-wide information aggregation
	ability of CDEC systems by separating event-coreference decisions made across
	documents from those made within a document. In addition, we suggest a better
	baseline for the task and semi-automatically identify several inconsistent
	annotations in the evaluation dataset.},
  url       = {http://aclweb.org/anthology/C16-1183}
}

@InProceedings{watanabe-EtAl:2016:COLING,
  author    = {Watanabe, Kento  and  Matsubayashi, Yuichiroh  and  Orita, Naho  and  Okazaki, Naoaki  and  Inui, Kentaro  and  Fukayama, Satoru  and  Nakano, Tomoyasu  and  Smith, Jordan  and  Goto, Masataka},
  title     = {Modeling Discourse Segments in Lyrics Using Repeated Patterns},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1959--1969},
  abstract  = {This study proposes a computational model of the discourse segments in lyrics
	to understand and to model the structure of lyrics. To test our hypothesis that
	discourse segmentations in lyrics strongly correlate with repeated patterns, we
	conduct the first large-scale corpus study on discourse segments in lyrics.
	Next, we propose the task to automatically identify segment boundaries in
	lyrics and train a logistic regression model for the task with the repeated
	pattern and textual features. The results of our empirical experiments
	illustrate the significance of capturing repeated patterns in predicting the
	boundaries of discourse segments in lyrics.},
  url       = {http://aclweb.org/anthology/C16-1184}
}

@InProceedings{li-wu:2016:COLING,
  author    = {Li, Wei  and  Wu, Yunfang},
  title     = {Multi-level Gated Recurrent Neural Network for dialog act classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1970--1979},
  abstract  = {In this paper we focus on the problem of dialog act (DA) labelling. This
	problem has recently attracted a lot of attention as it is an important
	sub-part of an automatic question answering system, which is currently in great
	demand. Traditional methods tend to see this problem as a sequence labelling
	task and deals with it by applying classifiers with rich features. Most of the
	current neural network models still omit the sequential information in the
	conversation.  Henceforth, we apply a novel multi-level gated recurrent neural
	network (GRNN) with non-textual information to predict the DA tag. Our model
	not only utilizes textual information, but also makes use of non-textual and
	contextual information.
	In comparison, our model has shown significant improvement over previous works
	on Switchboard Dialog Act (SWDA) task by over 6%.},
  url       = {http://aclweb.org/anthology/C16-1185}
}

@InProceedings{patra-das-bandyopadhyay:2016:COLING,
  author    = {Patra, Braja Gopal  and  Das, Dipankar  and  Bandyopadhyay, Sivaji},
  title     = {Multimodal Mood Classification - A Case Study of Differences in Hindi and Western Songs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1980--1989},
  abstract  = {Music information retrieval has emerged as a mainstream research area in the
	past two decades. Experiments on music mood classification have been performed
	mainly on Western music based on audio, lyrics and a combination of both.
	Unfortunately, due to the scarcity of digitalized resources, Indian music fares
	poorly in music mood retrieval research. In this paper, we identified the mood
	taxonomy and prepared multimodal mood annotated datasets for Hindi and Western
	songs. We identified important audio and lyric features using correlation based
	feature selection technique. Finally, we developed mood classification systems
	using Support Vector Machines and Feed Forward Neural Networks based on the
	features collected from audio, lyrics, and a combination of both. The best
	performing multimodal systems achieved F-measures of 75.1 and 83.5 for
	classifying the moods of the Hindi and Western songs respectively using Feed
	Forward Neural Networks. A comparative analysis indicates that the selected
	features work well for mood classification of the Western songs and produces
	better results as compared to the mood classification systems for Hindi songs.},
  url       = {http://aclweb.org/anthology/C16-1186}
}

@InProceedings{li-EtAl:2016:COLING6,
  author    = {Li, Chaozhuo  and  Wu, Yu  and  Wu, Wei  and  Xing, Chen  and  Li, Zhoujun  and  Zhou, Ming},
  title     = {Detecting Context Dependent Messages in a Conversational Environment},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1990--1999},
  abstract  = {While automatic response generation for building chatbot systems has drawn a
	lot of attention recently, there is limited understanding on when we need to
	consider the linguistic context of an input text in the generation process. The
	task is challenging, as messages in a conversational environment are short and
	informal, and evidence that can indicate a message is context dependent is
	scarce.
	After a study of social conversation data crawled from the web, we observed
	that some characteristics estimated from the responses of messages are
	discriminative for identifying context dependent messages.
	With the characteristics as weak supervision, we propose using a Long Short
	Term Memory (LSTM) network to learn a classifier.  Our method carries out text
	representation and classifier learning in a unified framework.                       
	     
	Experimental
	results show that the proposed method can significantly outperform baseline
	methods on accuracy of classification.},
  url       = {http://aclweb.org/anthology/C16-1187}
}

@InProceedings{venugopal-rus:2016:COLING,
  author    = {Venugopal, Deepak  and  Rus, Vasile},
  title     = {Joint Inference for Mode Identification in Tutorial Dialogues},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2000--2011},
  abstract  = {Identifying dialogue acts and dialogue modes during tutorial interactions is an
	extremely crucial sub-step in understanding patterns of effective tutor-tutee
	interactions. In this work, we develop a novel joint inference method that
	labels each utterance in a tutoring dialogue session with a dialogue act and a
	specific mode from a set of pre-defined dialogue acts and modes, respectively.
	Specifically, we develop our joint model using Markov Logic Networks (MLNs), a
	framework that combines first-order logic with probabilities, and is thus
	capable of representing complex, uncertain knowledge. We define first-order
	formulas in our MLN that encode the inter-dependencies between dialogue modes
	and more fine-grained dialogue actions. We then use a joint inference to
	jointly label the modes as well as the dialogue acts in an utterance. We
	compare our system against a pipeline system based on SVMs on a real-world
	dataset with tutoring sessions of over 500 students. Our results show that the
	joint inference system is far more effective than the pipeline system in mode
	detection, and improves over the performance of the pipeline system by about 6
	points in F1 score. The joint inference system also performs much better than
	the pipeline system in the context of labeling modes that highlight important
	pedagogical steps in tutoring.},
  url       = {http://aclweb.org/anthology/C16-1188}
}

@InProceedings{khanpour-guntakandla-nielsen:2016:COLING,
  author    = {Khanpour, Hamed  and  Guntakandla, Nishitha  and  Nielsen, Rodney},
  title     = {Dialogue Act Classification in Domain-Independent Conversations Using a Deep Recurrent Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2012--2021},
  abstract  = {In this study, we applied a deep LSTM structure to classify dialogue acts (DAs)
	in open-domain conversations. We found that the word embeddings parameters,
	dropout regularization, decay rate and number of layers are the parameters that
	have the largest effect on the final system accuracy. Using the findings of
	these experiments, we trained a deep LSTM network that outperforms the
	state-of-the-art on the Switchboard corpus by 3.11\%, and MRDA by 2.2\%.},
  url       = {http://aclweb.org/anthology/C16-1189}
}

@InProceedings{kumar-joshi:2016:COLING,
  author    = {Kumar, Vineet  and  Joshi, Sachindra},
  title     = {Non-sentential Question Resolution using Sequence to Sequence Learning},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2022--2031},
  abstract  = {An interactive Question Answering (QA) system frequently encounters
	non-sentential (incomplete) questions. These non-sentential questions may not
	make sense to the system when a user asks them without the context of
	conversation. The system thus needs to take into account the conversation
	context to process the question. In this work, we present a recurrent neural
	network (RNN) based encoder decoder network that can generate a complete
	(intended) question, given an incomplete question and  conversation context.
	RNN encoder decoder networks have been show to work well when trained on a
	parallel corpus with millions of sentences, however it is extremely hard to
	obtain conversation data of this magnitude. We therefore propose to decompose
	the original problem into two separate simplified problems where each problem
	focuses on an abstraction. Specifically, we train a semantic sequence model to
	learn semantic patterns, and a syntactic sequence model to learn linguistic
	patterns. We further combine syntactic and semantic sequence models to generate
	an ensemble model. Our model achieves a BLEU score of 30.15 as compared to
	18.54 using a standard RNN encoder decoder model.},
  url       = {http://aclweb.org/anthology/C16-1190}
}

@InProceedings{zhou-huang-zhu:2016:COLING,
  author    = {Zhou, Hao  and  Huang, Minlie  and  zhu, xiaoyan},
  title     = {Context-aware Natural Language Generation for Spoken Dialogue Systems},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2032--2041},
  abstract  = {Natural language generation (NLG) is an important component of question
	answering(QA) systems which has a significant impact on system quality. Most
	tranditional QA systems based on templates or rules tend to generate rigid and
	stylised responses without the natural variation of human language.
	Furthermore, such methods need an amount of work to generate the templates or
	rules. To address this problem, we propose a Context-Aware LSTM model for NLG.
	The model is completely driven by data without manual designed templates or
	rules. In addition, the context information, including the question to be
	answered, semantic values to be addressed in the response, and the dialogue act
	type during interaction, are well approached in the neural network model, which
	enables the model to produce variant and informative responses. The
	quantitative evaluation and human evaluation show that CA-LSTM obtains
	state-of-the-art performance.
	Author{3}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1191}
}

@InProceedings{serriere-EtAl:2016:COLING,
  author    = {Serri\`{e}re, Guillaume  and  Cerisara, Christophe  and  Fohr, Dominique  and  Mella, Odile},
  title     = {Weakly-supervised text-to-speech alignment confidence measure},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2042--2050},
  abstract  = {This work proposes a new confidence measure for evaluating text-to-speech
	alignment systems outputs, which is a key component for many applications, such
	as semi-automatic corpus anonymization, lips syncing, film dubbing, corpus
	preparation for speech synthesis and speech recognition acoustic models
	training. This confidence measure exploits deep neural networks that are
	trained on large corpora without direct supervision. It is evaluated on an
	open-source spontaneous speech corpus and outperforms a confidence score
	derived from a state-of-the-art text-to-speech aligner. We further show that
	this confidence measure can be used to fine-tune the output of this aligner and
	improve the quality of the resulting alignment.},
  url       = {http://aclweb.org/anthology/C16-1192}
}

@InProceedings{kim-stratos-sarikaya:2016:COLING2,
  author    = {Kim, Young-Bum  and  Stratos, Karl  and  Sarikaya, Ruhi},
  title     = {Domainless Adaptation by Constrained Decoding on a Schema Lattice},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2051--2060},
  abstract  = {In many applications such as personal digital assistants,
	there is a constant need for new domains to increase the system's coverage of
	user queries.
	A conventional approach is to learn a separate model every time a new domain is
	introduced.
	This approach is slow, inefficient, and a bottleneck for scaling to a large
	number of domains.
	In this paper, we introduce a framework that allows us to have a single model
	that can handle all domains:
	including unknown domains that may be created in the future as long as they are
	covered in the master schema.
	The key idea is to remove the need for distinguishing domains by explicitly
	predicting the schema of queries.
	Given permitted schema of a query, we perform constrained decoding on a lattice
	of slot sequences allowed under
	the schema. The proposed model achieves competitive and often superior
	performance over the
	conventional model trained separately per domain.},
  url       = {http://aclweb.org/anthology/C16-1193}
}

@InProceedings{singh-EtAl:2016:COLING1,
  author    = {Singh, Mittul  and  Greenberg, Clayton  and  Oualil, Youssef  and  Klakow, Dietrich},
  title     = {Sub-Word Similarity based Search for Embeddings: Inducing Rare-Word Embeddings for Word Similarity Tasks and Language Modelling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2061--2070},
  abstract  = {Training good word embeddings requires large amounts of data. Out-of-vocabulary
	words will still be encountered at test-time, leaving these words without
	embeddings. To overcome this lack of embeddings for rare words, existing
	methods leverage morphological features to generate embeddings. While the
	existing methods use computationally-intensive rule-based (Soricut and Och,
	2015) or tool-based (Botha and Blunsom, 2014) morphological analysis to
	generate embeddings, our system applies a computationally-simpler sub-word
	search on words that have existing embeddings. Embeddings of the sub-word
	search results are then combined using string similarity functions to generate
	rare word embeddings. We augmented pre-trained word embeddings with these novel
	embeddings and evaluated on a rare word similarity task, obtaining up to 3
	times improvement in correlation over the original set of embeddings. Applying
	our technique to embeddings trained on larger datasets led to on-par
	performance with the existing state-of-the-art for this task. Additionally,
	while analysing augmented embeddings in a log-bilinear language model, we
	observed up to 50% reduction in rare word perplexity in comparison to other
	more complex language models.},
  url       = {http://aclweb.org/anthology/C16-1194}
}

@InProceedings{meyer-ecklekohler-gurevych:2016:COLING,
  author    = {Meyer, Christian M.  and  Eckle-Kohler, Judith  and  Gurevych, Iryna},
  title     = {Semi-automatic Detection of Cross-lingual Marketing Blunders based on Pragmatic Label Propagation in Wiktionary},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2071--2081},
  abstract  = {We introduce the task of detecting cross-lingual marketing blunders, which
	occur if a trade name resembles an inappropriate or negatively connotated word
	in a target language. To this end, we suggest a formal task definition and a
	semi-automatic method based the propagation of pragmatic labels from Wiktionary
	across sense-disambiguated translations. Our final tool assists users by
	providing clues for problematic names in any language, which we simulate in two
	experiments on detecting previously occurred marketing blunders and identifying
	relevant clues for established international brands. We conclude the paper with
	a suggested research roadmap for this new task. To initiate further research,
	we publish our online demo along with the source code and data at
	http://uby.ukp.informatik.tu-darmstadt.de/blunder/.},
  url       = {http://aclweb.org/anthology/C16-1195}
}

@InProceedings{milde-EtAl:2016:COLING,
  author    = {Milde, Benjamin  and  Wacker, Jonas  and  Radomski, Stefan  and  M\"{u}hlh\"{a}user, Max  and  Biemann, Chris},
  title     = {Ambient Search: A Document Retrieval System for Speech Streams},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2082--2091},
  abstract  = {We present Ambient Search, an open source system for displaying and retrieving
	relevant documents in real time for speech input. The system works ambiently,
	that is, it unobstructively listens to speech streams in the background,
	identifies keywords and keyphrases for query construction and continuously
	serves relevant documents from its index. Query terms are ranked with Word2Vec
	and TF-IDF and are continuously updated to allow for ongoing querying of a
	document collection. The retrieved documents, in our case Wikipedia articles,
	are visualized in real time in a browser interface. Our evaluation shows that
	Ambient Search compares favorably to another implicit information retrieval
	system on speech streams. Furthermore, we extrinsically evaluate multiword
	keyphrase generation, showing positive impact for manual transcriptions.},
  url       = {http://aclweb.org/anthology/C16-1196}
}

@InProceedings{li-EtAl:2016:COLING7,
  author    = {Li, Shoushan  and  Dai, Bin  and  Gong, Zhengxian  and  Zhou, Guodong},
  title     = {Semi-supervised Gender Classification with Joint Textual and Social Modeling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2092--2100},
  abstract  = {In gender classification, labeled data is often limited while unlabeled data is
	ample. This motivates semi-supervised learning for gender classification to
	improve the performance by exploring the knowledge in both labeled and
	unlabeled data. In this paper, we propose a semi-supervised approach to gender
	classification by leveraging textual features and a specific kind of indirect
	links among the users which we call “same-interest” links. Specifically, we
	propose a factor graph, namely Textual and Social Factor Graph (TSFG), to model
	both the textual and the “same-interest” link information.                   
	Empirical
	studies demonstrate the effectiveness of the proposed approach to
	semi-supervised gender classification.},
  url       = {http://aclweb.org/anthology/C16-1197}
}

@InProceedings{pilan-volodina-zesch:2016:COLING,
  author    = {Pil\'{a}n, Ildik\'{o}  and  Volodina, Elena  and  Zesch, Torsten},
  title     = {Predicting proficiency levels in learner writings by transferring a linguistic complexity model from expert-written coursebooks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2101--2111},
  abstract  = {The lack of a sufficient amount of data tailored for a task is a
	well-recognized problem for many statistical NLP methods. In this paper, we
	explore whether data sparsity can be successfully tackled when classifying
	language proficiency levels in the domain of learner-written output texts. We
	aim at overcoming data sparsity by incorporating knowledge in the trained model
	from another domain consisting of input texts written by teaching professionals
	for learners. We compare different domain adaptation techniques and find that a
	weighted combination of the two types of data performs best, which can even
	rival systems based on considerably larger amounts of in-domain data. Moreover,
	we show that normalizing errors in learners' texts can substantially improve
	classification when level-annotated in-domain data is not available.},
  url       = {http://aclweb.org/anthology/C16-1198}
}

@InProceedings{zhang-EtAl:2016:COLING4,
  author    = {Zhang, Dong  and  Li, Shoushan  and  Wang, Hongling  and  Zhou, Guodong},
  title     = {User Classification with Multiple Textual Perspectives},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2112--2121},
  abstract  = {Textual information is of critical importance for automatic user classification
	in social media. However, most previous studies model textual features in a
	single perspective while the text in a user homepage typically possesses
	different styles of text, such as original message and comment from others. In
	this paper, we propose a novel approach, namely ensemble LSTM, to user
	classification by incorporating multiple textual perspectives. Specifically,
	our approach first learns a LSTM representation with a LSTM recurrent neural
	network and then presents a joint learning method to integrating all
	naturally-divided textual perspectives. Empirical studies on two basic user
	classification tasks, i.e., gender classification and age classification,
	demonstrate the effectiveness of the proposed approach to user classification
	with multiple textual perspectives.},
  url       = {http://aclweb.org/anthology/C16-1199}
}

@InProceedings{jiang-diesner:2016:COLING,
  author    = {Jiang, Ming  and  Diesner, Jana},
  title     = {Says Who…? Identification of Expert versus Layman Critics’ Reviews of Documentary Films},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2122--2132},
  abstract  = {We extend classic review mining work by building a binary classifier that
	predicts whether a review of a documentary film was written by an expert or a
	layman with 90.70% accuracy (F1 score), and compare the characteristics of the
	predicted classes. A variety of standard lexical and syntactic features was
	used for this supervised learning task. Our results suggest that experts write
	comparatively lengthier and more detailed reviews that feature more complex
	grammar and a higher diversity in their vocabulary. Layman reviews are more
	subjective and contextualized in peoples’ everyday lives. Our error analysis
	shows that laymen are about twice as likely to be mistaken as experts than vice
	versa. We argue that the type of author might be a useful new feature for
	improving the accuracy of predicting the rating, helpfulness and authenticity
	of reviews. Finally, the outcomes of this work might help researchers and
	practitioners in the field of impact assessment to gain a more fine-grained
	understanding of the perception of different types of media consumers and
	reviewers of a topic, genre or information product.},
  url       = {http://aclweb.org/anthology/C16-1200}
}

@InProceedings{ding-EtAl:2016:COLING,
  author    = {Ding, Xiao  and  Zhang, Yue  and  Liu, Ting  and  Duan, Junwen},
  title     = {Knowledge-Driven Event Embedding for Stock Prediction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2133--2142},
  abstract  = {Representing structured events as vectors in continuous space offers a new way
	for defining dense features for natural language processing (NLP) applications.
	Prior work has proposed effective methods to learn event representations that
	can capture syntactic and semantic information over text corpus, demonstrating
	their effectiveness for downstream tasks such as event-driven stock prediction.
	On the other hand, events extracted from raw texts do not contain background
	knowledge on entities and relations that they are mentioned. To address this
	issue, this paper proposes to leverage extra information from knowledge graph,
	which provides ground truth such as attributes and properties of entities and
	encodes valuable relations between entities. Specifically,
	we propose a joint model to combine knowledge graph information into the
	objective function of an event embedding learning model. Experiments on event
	similarity and stock market prediction show that our model is more capable of
	obtaining better event embeddings and making more accurate prediction on stock
	market volatilities.},
  url       = {http://aclweb.org/anthology/C16-1201}
}

@InProceedings{chen-EtAl:2016:COLING2,
  author    = {Chen, Wenliang  and  Zhang, Zhenjie  and  Li, Zhenghua  and  Zhang, Min},
  title     = {Distributed Representations for Building Profiles of Users and Items from Text Reviews},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2143--2153},
  abstract  = {In this paper, we propose an approach to learn distributed representations of
	users and items from text comments for recommendation systems. Traditional
	recommendation algorithms, e.g. collaborative filtering and matrix completion,
	are not designed to exploit the key information hidden in the text comments,
	while existing opinion mining methods do not provide direct support to
	recommendation systems with useful features on users and items. Our approach
	attempts to construct vectors to represent profiles of users and items under a
	unified framework to maximize word appearance likelihood. Then, the vector
	representations are used for a recommendation task in which we predict scores
	on unobserved user-item pairs without given texts. The recommendation-aware
	distributed representation approach is fully supported by effective and
	efficient learning algorithms over massive text archive. Our empirical
	evaluations on real datasets show that our system outperforms the
	state-of-the-art baseline systems.},
  url       = {http://aclweb.org/anthology/C16-1202}
}

@InProceedings{tang-EtAl:2016:COLING1,
  author    = {Tang, Haiqing  and  Xiong, Deyi  and  Zhang, Min  and  Gong, Zhengxian},
  title     = {Improving Statistical Machine Translation with Selectional Preferences},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2154--2163},
  abstract  = {Long-distance semantic dependencies are crucial for lexical choice in
	statistical machine translation. In this paper, we study semantic dependencies
	between verbs and their arguments by modeling selectional preferences in the
	context of machine translation. We incorporate preferences that verbs impose on
	subjects and objects into translation. In addition, bilingual selectional
	preferences between source-side verbs and target-side arguments are also
	investigated. Our experiments on Chinese-to-English translation tasks with
	large-scale training data demonstrate that statistical machine translation
	using verbal selectional preferences can achieve statistically significant
	improvements over a state-of-the-art baseline.},
  url       = {http://aclweb.org/anthology/C16-1203}
}

@InProceedings{stanojevic-simaan:2016:COLING,
  author    = {Stanojevi\'{c}, Milo\v{s}  and  Sima'an, Khalil},
  title     = {Hierarchical Permutation Complexity for Word Order Evaluation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2164--2173},
  abstract  = {Existing approaches for evaluating word order in machine translation work with
	metrics computed directly over a permutation of word positions in system output
	relative to a reference translation. However, every permutation factorizes into
	a permutation tree (PET) built of primal permutations, i.e., atomic units that
	do not factorize any further. In this paper we explore the idea that
	permutations factorizing into (on average) shorter primal permutations should
	represent simpler ordering as well. Consequently, we contribute Permutation
	Complexity, a class of metrics over PETs and their extension to forests, and
	define tight metrics, a sub-class of metrics implementing this idea.
	Subsequently we define example tight metrics and empirically test them
	in word order evaluation. Experiments on the WMT13 data sets for ten language
	pairs show that a tight metric is more often than not better than the
	baselines.},
  url       = {http://aclweb.org/anthology/C16-1204}
}

@InProceedings{meng-EtAl:2016:COLING,
  author    = {Meng, Fandong  and  Lu, Zhengdong  and  Li, Hang  and  Liu, Qun},
  title     = {Interactive Attention for Neural Machine Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2174--2185},
  abstract  = {Conventional attention-based Neural Machine Translation (NMT) conducts dynamic
	alignment in generating the target sentence. By repeatedly reading the
	representation of source sentence, which keeps fixed after generated by the
	encoder (Bahdanau et al., 2015), the attention mechanism has greatly enhanced
	state-of-the-art NMT. In this paper, we pro- pose a new attention mechanism,
	called INTERACTIVE ATTENTION, which models the interaction between the decoder
	and the representation of source sentence during translation by both reading
	and writing operations. INTERACTIVE ATTENTION can keep track of the interaction
	history and therefore improve the translation performance. Experiments on NIST
	Chinese-English translation task show that INTERACTIVE ATTENTION can achieve
	significant improvements over both the previous attention-based NMT baseline
	and some state-of-the-art variants of attention-based NMT (i.e., coverage
	models (Tu et al., 2016)). And neural machine translator with our INTERACTIVE
	ATTENTION can outperform the open source attention-based NMT system Groundhog
	by 4.22 BLEU points and the open source phrase-based system Moses by 3.94 BLEU
	points averagely on multiple test sets.
	Author{2}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1205}
}

@InProceedings{pado:2016:COLING,
  author    = {Pado, Ulrike},
  title     = {Get Semantic With Me! The Usefulness of Different Feature Types for Short-Answer Grading},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2186--2195},
  abstract  = {Automated short-answer grading is key to help close the automation
	  loop for large-scale, computerised testing in education. A wide
	  range of features on different levels of linguistic processing has
	  been proposed so far.  We investigate the relative importance of the
	  different types of features across a range of standard corpora (both
	  from a language skill and content assessment context, in English and
	  in German). We find that features on the lexical, text similarity
	  and dependency level often suffice to approximate full-model
	  performance. Features derived from semantic processing particularly
	  benefit the linguistically more varied answers in content assessment
	  corpora.},
  url       = {http://aclweb.org/anthology/C16-1206}
}

@InProceedings{blevins-EtAl:2016:COLING,
  author    = {Blevins, Terra  and  Kwiatkowski, Robert  and  MacBeth, Jamie  and  McKeown, Kathleen  and  Patton, Desmond  and  Rambow, Owen},
  title     = {Automatically Processing Tweets from Gang-Involved Youth: Towards Detecting Loss and Aggression},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2196--2206},
  abstract  = {Violence is a serious problems for cities like Chicago and has been exacerbated
	by the use of social media by gang-involved youths for taunting rival gangs. We
	present a corpus of tweets from a young and powerful female gang member and her
	communicators, which we have annotated with discourse intention, using a deep
	read to understand how and what triggered conversations to escalate into
	aggression. We use this corpus to develop a part-of-speech tagger and phrase
	table for the variant of English that is used and a classifier for identifying
	tweets that express grieving and aggression.},
  url       = {http://aclweb.org/anthology/C16-1207}
}

@InProceedings{chen-EtAl:2016:COLING3,
  author    = {Chen, Chengyao  and  Wang, Zhitao  and  Lei, Yu  and  Li, Wenjie},
  title     = {Content-based Influence Modeling for Opinion Behavior Prediction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2207--2216},
  abstract  = {Nowadays, social media has become a popular platform for companies to
	understand their customers. It provides valuable opportunities to gain new
	insights into how a person's opinion about a product is influenced by his
	friends. Though various approaches have been proposed to study the opinion
	formation problem, they all formulate opinions as the derived sentiment values
	either discrete or continuous without considering the semantic information. In
	this paper, we propose a Content-based Social Influence Model to study the
	implicit mechanism underlying the change of opinions. We then apply the learned
	model to predict users' future opinions. The advantages of the proposed model
	is the ability to handle the semantic information and to learn two influence
	components including the opinion influence of the content information and the
	social relation factors. In the experiments conducted on Twitter datasets, our
	model significantly outperforms other popular opinion formation models.},
  url       = {http://aclweb.org/anthology/C16-1208}
}

@InProceedings{doyle-levy:2016:COLING,
  author    = {Doyle, Gabriel  and  Levy, Roger},
  title     = {Data-driven learning of symbolic constraints for a log-linear model in a phonological setting},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2217--2226},
  abstract  = {We propose a non-parametric Bayesian model for learning and weighting
	symbolically-defined constraints to populate a log-linear model.  The model
	jointly infers a vector of binary constraint values for each candidate output
	and likely definitions for these constraints, combining observations of the
	output classes with a (potentially infinite) grammar over potential constraint
	definitions.  We present results on a small morphophonological system, English
	regular plurals, as a test case.  The inferred constraints, based on a grammar
	of articulatory features, perform as well as theoretically-defined constraints
	on both observed and novel forms of English regular plurals. The learned
	constraint values and definitions also closely resemble standard constraints
	defined within phonological theory.},
  url       = {http://aclweb.org/anthology/C16-1209}
}

@InProceedings{huang-yang-chen:2016:COLING,
  author    = {Huang, Hen-Hsen  and  Yang, Chang-Rui  and  Chen, Hsin-Hsi},
  title     = {Chinese Tense Labelling and Causal Analysis},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2227--2237},
  abstract  = {This paper explores the role of tense information in Chinese causal analysis.
	Both tasks of causal type classification and causal directionality
	identification are experimented to show the significant improvement gained from
	tense features. To automatically extract the tense features, a Chinese tense
	predictor is proposed. Based on large amount of parallel data, our
	semi-supervised approach improves the dependency-based convolutional neural
	network (DCNN) models for Chinese tense labelling and thus the causal analysis.},
  url       = {http://aclweb.org/anthology/C16-1210}
}

@InProceedings{kai-EtAl:2016:COLING,
  author    = {Kai, Yang  and  Yi, Cai  and  Zhenhong, Chen  and  Ho-fung, Leung  and  Raymond, LAU},
  title     = {Exploring Topic Discriminating Power of Words in Latent Dirichlet Allocation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2238--2247},
  abstract  = {Latent Dirichlet Allocation (LDA) and its variants have been widely used to
	discover latent topics in textual documents. However, some of topics
	generated by LDA may be noisy with irrelevant words scattering across these
	topics. We name this kind of words as topic-indiscriminate words, which tend
	to make topics more ambiguous and less interpretable by humans. In our work,
	we propose a new topic model named TWLDA, which assigns low weights to words
	with low topic discriminating power (ability). Our experimental results show
	that the proposed approach, which effectively reduces the number of
	topic-indiscriminate words in discovered topics, improves the effectiveness
	of LDA.},
  url       = {http://aclweb.org/anthology/C16-1211}
}

@InProceedings{zhao-huang-ma:2016:COLING,
  author    = {Zhao, Kai  and  Huang, Liang  and  Ma, Mingbo},
  title     = {Textual Entailment with Structured Attentions and Composition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2248--2258},
  abstract  = {Deep learning techniques are increasingly popular in the textual entailment
	task, overcoming the fragility of traditional discrete models with hard
	alignments and logics. In particular, the recently proposed attention models
	(Rockta ̈schel et al., 2015; Wang and Jiang, 2015) achieves state-of-the-art
	accuracy by computing soft word alignments between the premise and hypothesis
	sentences. However, there remains a major limitation: this line of work
	completely ignores syntax and recursion, which is helpful in many traditional
	efforts. We show that it is beneficial to extend the attention model to tree
	nodes between premise and hypothesis. More importantly, this subtree-level
	attention reveals information about entailment relation. We study the recursive
	composition of this subtree-level entailment relation, which can be viewed as a
	soft version of the Natural Logic framework (MacCartney and Manning, 2009).
	Experiments show that our structured attention and entailment composition model
	can correctly identify and infer entailment relations from the bottom up, and
	bring significant improvements in accuracy.},
  url       = {http://aclweb.org/anthology/C16-1212}
}

@InProceedings{maziarz-EtAl:2016:COLING,
  author    = {Maziarz, Marek  and  Piasecki, Maciej  and  Rudnicka, Ewa  and  Szpakowicz, Stan  and  Kędzia, Pawe{\l}},
  title     = {plWordNet 3.0 -- a Comprehensive Lexical-Semantic Resource},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2259--2268},
  abstract  = {We have released plWordNet 3.0, a very large wordnet for Polish. In addition to
	what is expected in wordnets -- richly interrelated synsets -- it contains
	sentiment and emotion annotations, a large set of multi-word expressions, and a
	mapping onto WordNet 3.1. Part of the release is enWordNet 1.0, a substantially
	enlarged copy of WordNet 3.1, with material added to allow for a more complete
	mapping. The paper discusses the design principles of plWordNet, its content,
	its statistical portrait, a comparison with similar resources, and a partial
	list of applications.},
  url       = {http://aclweb.org/anthology/C16-1213}
}

@InProceedings{londhe-srihari-gopalakrishnan:2016:COLING,
  author    = {Londhe, Nikhil  and  Srihari, Rohini  and  Gopalakrishnan, Vishrawas},
  title     = {Time-Independent and Language-Independent Extraction of Multiword Expressions From Twitter},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2269--2278},
  abstract  = {Multiword Expressions (MWEs) are crucial lexico-semantic units in any language.
	However, most work on MWEs has been focused on standard monolingual corpora. In
	this work, we examine MWE usage on Twitter - an inherently multilingual medium
	with an extremely short average text length that is often replete with
	grammatical errors. In this work we present a new graph based, language
	agnostic method for automatically extracting MWEs from tweets. We show how our
	method outperforms standard Association Measures. We also present a novel
	unsupervised evaluation technique to ascertain the accuracy of MWE extraction.},
  url       = {http://aclweb.org/anthology/C16-1214}
}

@InProceedings{judea-strube:2016:COLING,
  author    = {Judea, Alex  and  Strube, Michael},
  title     = {Incremental Global Event Extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2279--2289},
  abstract  = {Event extraction is a difficult information extraction task. Li et al. (2014)
	explore the benefits of modeling event extraction and two related tasks, entity
	mention and relation extraction, jointly. This joint system achieves
	state-of-the-art performance in all tasks.  However, as a system operating only
	at the sentence level, it misses valuable information from other parts of the
	document. In this paper, we present an incremental easy-first approach to make
	the global context of the entire document available to the intra-sentential,
	state-of-the-art event extractor. We show that our method robustly increases
	performance on two datasets, namely ACE 2005 and TAC 2015.},
  url       = {http://aclweb.org/anthology/C16-1215}
}

@InProceedings{xu-EtAl:2016:COLING2,
  author    = {xu, jiaming  and  Shi, Jing  and  Yao, Yiqun  and  Zheng, Suncong  and  Xu, Bo  and  Xu, Bo},
  title     = {Hierarchical Memory Networks for Answer Selection on Unknown Words},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2290--2299},
  abstract  = {Recently, end-to-end memory networks have shown promising results on Question
	Answering task, which encode the past facts into an explicit memory and perform
	reasoning ability by making multiple computational steps on the memory.
	However, memory networks conduct the reasoning on sentence-level memory to
	output coarse semantic vectors and do not further take any attention mechanism
	to focus on words, which may lead to the model lose some detail information,
	especially when the answers are rare or unknown words. In this paper, we
	propose a novel Hierarchical Memory Networks, dubbed HMN. First, we encode the
	past facts into sentence-level memory and word-level memory respectively. Then,
	\(k\)-max pooling is exploited following reasoning module on the sentence-level
	memory to sample the \(k\) most relevant sentences to a question and feed these
	sentences into attention mechanism on the word-level memory to focus the words
	in the selected sentences. Finally, the prediction is jointly learned over the
	outputs of the sentence-level reasoning module and the word-level attention
	mechanism. The experimental results demonstrate that our approach successfully
	conducts answer selection on unknown words and achieves a better performance
	than memory networks.},
  url       = {http://aclweb.org/anthology/C16-1216}
}

@InProceedings{gupta-EtAl:2016:COLING2,
  author    = {Gupta, Amit  and  Piccinno, Francesco  and  Kozhevnikov, Mikhail  and  Pasca, Marius  and  Pighin, Daniele},
  title     = {Revisiting Taxonomy Induction over Wikipedia},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2300--2309},
  abstract  = {Guided by multiple heuristics, a unified taxonomy of entities and categories is
	distilled from the Wikipedia category network. A comprehensive evaluation,
	based on the analysis of upward generalization paths,  demonstrates that the
	taxonomy supports generalizations which are more than twice as accurate as the
	state of the art. The taxonomy is available at http://headstaxonomy.com.},
  url       = {http://aclweb.org/anthology/C16-1217}
}

@InProceedings{nguyen-EtAl:2016:COLING,
  author    = {Nguyen, Thien Huu  and  Fauceglia, Nicolas  and  Rodriguez Muro, Mariano  and  Hassanzadeh, Oktie  and  Massimiliano Gliozzo, Alfio  and  Sadoghi, Mohammad},
  title     = {Joint Learning of Local and Global Features for Entity Linking via Neural Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2310--2320},
  abstract  = {Previous studies have highlighted the necessity for entity linking systems to
	capture the local entity-mention similarities and the global topical coherence.
	We introduce a novel framework based on convolutional neural networks and
	recurrent neural networks to simultaneously model the local and global features
	for entity linking. The proposed model benefits from the capacity of
	convolutional neural networks to induce the underlying representations for
	local contexts and the advantage of recurrent neural networks to adaptively
	compress variable length sequences of predictions for global constraints. Our
	evaluation on multiple datasets demonstrates the effectiveness of the model and
	yields the state-of-the-art performance on such datasets. In addition, we
	examine the entity linking systems on the domain adaptation setting that
	further demonstrates the cross-domain robustness of the proposed model.},
  url       = {http://aclweb.org/anthology/C16-1218}
}

@InProceedings{gunes-furche-orsi:2016:COLING,
  author    = {Gunes, Omer  and  Furche, Tim  and  Orsi, Giorgio},
  title     = {Structured Aspect Extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2321--2332},
  abstract  = {Aspect extraction identifies relevant features from a textual description of an
	entity, e.g., a phone, and is typically targeted to product descriptions,
	reviews, and other short texts as an enabling task for, e.g., opinion mining
	and information retrieval.
	Current aspect extraction methods mostly focus on aspect terms and often
	neglect interesting modifiers of the term or embed them in the aspect term
	without proper distinction. Moreover, flat syntactic structures are often
	assumed, resulting in inaccurate extractions of complex aspects.
	This paper studies the problem of structured aspect extraction, a variant of
	traditional aspect extraction aiming at a fine-grained extraction of complex
	(i.e., hierarchical) aspects.
	We propose an unsupervised and scalable method for structured aspect extraction
	consisting of statistical noun phrase clustering, cPMI-based noun phrase
	segmentation, and hierarchical pattern induction.
	Our evaluation shows a substantial improvement over existing methods in terms
	of both quality and computational efficiency.},
  url       = {http://aclweb.org/anthology/C16-1219}
}

@InProceedings{baker-kiela-korhonen:2016:COLING,
  author    = {Baker, Simon  and  Kiela, Douwe  and  Korhonen, Anna},
  title     = {Robust Text Classification for Sparsely Labelled Data Using Multi-level Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2333--2343},
  abstract  = {The conventional solution for handling sparsely labelled data is extensive
	feature engineering. This is time consuming and task and domain specific. We
	present a novel approach for learning embedded features that aims to alleviate
	this problem. Our approach jointly learns embeddings at different levels of
	granularity (word, sentence and document) along with the class labels. The
	intuition is that topic semantics represented by embeddings at multiple levels
	results in better classification. We evaluate this approach in unsupervised and
	semi-supervised settings on two sparsely labelled classification tasks,
	outperforming the handcrafted models and several embedding baselines.},
  url       = {http://aclweb.org/anthology/C16-1220}
}

@InProceedings{stathopoulos-teufel:2016:COLING,
  author    = {Stathopoulos, Yiannos  and  Teufel, Simone},
  title     = {Mathematical Information Retrieval based on Type Embeddings and Query Expansion},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2344--2355},
  abstract  = {We present an approach to mathematical information retrieval (MIR) that
	exploits a special kind of technical terminology, referred to as a
	mathematical type. In this paper, we present and evaluate a type
	detection mechanism and show its positive effect on the retrieval of
	research-level mathematics. Our best model, which performs query expansion with
	a type-aware embedding space, strongly outperforms standard IR models with
	state-of-the-art query expansion (vector space-based and language
	modelling-based), on a relatively new corpus of research-level queries.},
  url       = {http://aclweb.org/anthology/C16-1221}
}

@InProceedings{sneiders:2016:COLING,
  author    = {Sneiders, Eriks},
  title     = {Text Retrieval by Term Co-occurrences in a Query-based Vector Space},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2356--2365},
  abstract  = {Term co-occurrence in a sentence or paragraph is a powerful and often
	overlooked feature for text matching in document retrieval. In our experiments
	with matching email-style query messages to webpages, such term co-occurrence
	helped greatly to filter and rank documents, compared to matching document-size
	bags-of-words. The paper presents the results of the experiments as well as a
	text-matching model where the query shapes the vector space, a document is
	modelled by two or three vectors in this vector space, and the query-document
	similarity score depends on the length of the vectors and the relationships
	between them.},
  url       = {http://aclweb.org/anthology/C16-1222}
}

@InProceedings{yu-jiang:2016:COLING,
  author    = {Yu, Jianfei  and  Jiang, Jing},
  title     = {Pairwise Relation Classification with Mirror Instances and a Combined Convolutional Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2366--2377},
  abstract  = {Relation classification is the task of classifying the semantic relations
	between entity pairs in text. Observing that existing work has not fully
	explored using different representations for relation instances, especially in
	order to better handle the asymmetry of relation types, in this paper, we
	propose a neural network based method for relation classification that combines
	the raw sequence and the shortest dependency path representations of relation
	instances and uses mirror instances to perform pairwise relation
	classification. We evaluate our proposed models on the SemEval-2010 Task 8
	dataset. The empirical results show that with two additional features, our
	model achieves the state-of-the-art result of F1 score of 85.7.},
  url       = {http://aclweb.org/anthology/C16-1223}
}

@InProceedings{wang-tan-han:2016:COLING,
  author    = {Wang, Lidan  and  Tan, Ming  and  Han, Jiawei},
  title     = {FastHybrid: A Hybrid Model for Efficient Answer Selection},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2378--2388},
  abstract  = {Answer selection is a core component in any question-answering systems. It aims
	to select correct answer sentences for a given question from a pool of
	candidate sentences. In recent years, many deep learning methods have been
	proposed and shown excellent results for this task. However, these methods
	typically require extensive parameter (and hyper-parameter) tuning, which give
	rise to efficiency issues for large-scale datasets, and potentially make them
	less portable across new datasets and domains (as re-tuning is usually
	required). In this paper, we propose an extremely efficient hybrid model
	(FastHybrid) that tackles the problem from both an accuracy and scalability
	point of view. FastHybrid is a light-weight model that requires little tuning
	and adaptation across different domains. It combines a fast deep model (which
	will be introduced in the method section) with an initial information retrieval
	model to effectively and efficiently handle answer selection. We introduce a
	new efficient attention mechanism in the hybrid model and demonstrate its
	effectiveness on several QA datasets. Experimental results show that although
	the hybrid uses no training data, its accuracy is often on-par with supervised
	deep learning techniques, while significantly reducing training and tuning
	costs across different domains.},
  url       = {http://aclweb.org/anthology/C16-1224}
}

@InProceedings{kim-lee:2016:COLING,
  author    = {Kim, Bogyum  and  Lee, Jae Sung},
  title     = {Extracting Spatial Entities and Relations in Korean Text},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2389--2396},
  abstract  = {A spatial information extraction system retrieves spatial entities and their
	relationships for geological searches and reasoning. Spatial information
	systems have been developed mainly for English text, e.g., through the
	SpaceEval competition. Some of the techniques are useful but not directly
	applicable to Korean text, because of linguistic differences and the lack of
	language resources. In this paper, we propose a Korean spatial entity
	extraction model and a spatial relation extraction model; the spatial entity
	extraction model uses word vectors to alleviate the over generation and the
	spatial relation extraction mod-el uses dependency parse labels to find the
	proper arguments in relations. Experiments with Korean text show that the two
	models are effective for spatial information extraction.},
  url       = {http://aclweb.org/anthology/C16-1225}
}

@InProceedings{xu-EtAl:2016:COLING3,
  author    = {xu, kun  and  Feng, Yansong  and  Huang, Songfang  and  Zhao, Dongyan},
  title     = {Hybrid Question Answering over Knowledge Base and Free Text},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2397--2407},
  abstract  = {Recent trend in question answering (QA) systems focuses on using structured
	knowledge bases (KBs) to find answers. While these systems are able to provide
	more precise answers than information retrieval (IR) based QA systems, the
	natural incompleteness of KB inevitably limits the question scope that the
	system can answer. In this paper, we present a hybrid question answering
	(hybrid-QA) system which exploits both structured knowledge base and free text
	to answer a question.
	The main challenge is to recognize the meaning of a question using these two
	resources, i.e., structured KB and free text. To address this, we map
	relational phrases to KB predicates and textual relations simultaneously, and
	further develop an integer linear program (ILP) model to infer on these
	candidates and provide a globally optimal solution.
	Experiments on benchmark datasets show that our system can benefit from both
	structured KB and free text, outperforming the state-of-the-art systems.},
  url       = {http://aclweb.org/anthology/C16-1226}
}

@InProceedings{shen-liu:2016:COLING,
  author    = {Shen, Jie  and  Liu, Cong},
  title     = {Improved Word Embeddings with Implicit Structure Information},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2408--2417},
  abstract  = {Distributed word representation is an efficient method for capturing semantic
	and syntactic word relations. In this work, we introduce an extension to the
	continuous bag-of-words model for learning word representations efficiently by
	using implicit structure information. Instead of relying on a syntactic parser
	which might be noisy and slow to build, we compute weights representing
	probabilities of syntactic relations based on the Huffman softmax tree in an
	efficient heuristic. The constructed “implicit graphs” from these weights
	show that these weights contain useful implicit structure information.
	Extensive experiments performed on several word similarity and word analogy
	tasks show gains compared to the basic continuous bag-of-words model.},
  url       = {http://aclweb.org/anthology/C16-1227}
}

@InProceedings{dahou-EtAl:2016:COLING,
  author    = {Dahou, Abdelghani  and  Xiong, Shengwu  and  Zhou, Junwei  and  Haddoud, Mohamed Houcine  and  Duan, Pengfei},
  title     = {Word Embeddings and Convolutional Neural Network for Arabic Sentiment Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2418--2427},
  abstract  = {With the development and the advancement of social networks, forums, blogs and
	online sales, a growing number of Arabs are expressing their opinions on the
	web. In this paper, a scheme of Arabic sentiment classification, which
	evaluates and detects the sentiment polarity from Arabic reviews and Arabic
	social media, is studied. We investigated in several architectures to build a
	quality neural word embeddings using a 3.4 billion words corpus from a
	collected 10 billion words web-crawled corpus. Moreover, a convolutional neural
	network trained on top of pre-trained Arabic word embeddings is used for
	sentiment classification to evaluate the quality of these word embeddings. The
	simulation results show that the proposed scheme outperforms the existed
	methods on 4 out of 5 balanced and unbalanced datasets.},
  url       = {http://aclweb.org/anthology/C16-1228}
}

@InProceedings{wang-jiang-luo:2016:COLING,
  author    = {Wang, Xingyou  and  Jiang, Weijie  and  Luo, Zhiyong},
  title     = {Combination of Convolutional and Recurrent Neural Network for Sentiment Analysis of Short Texts},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2428--2437},
  abstract  = {Sentiment analysis of short texts is challenging because of the limited
	contextual information they usually contain. In recent years, deep learning
	models such as convolutional neural networks (CNNs) and recurrent neural
	networks (RNNs) have been applied to text sentiment analysis with comparatively
	remarkable results. In this paper, we describe a jointed CNN and RNN
	architecture, taking advantage of the coarse-grained local features generated
	by CNN and long-distance dependencies learned via RNN for sentiment analysis of
	short texts. Experimental results show an obvious improvement upon the
	state-of-the-art on three benchmark corpora, MR, SST1 and SST2, with 82.28%,
	51.50% and 89.95% accuracy, respectively.},
  url       = {http://aclweb.org/anthology/C16-1229}
}

@InProceedings{zubiaga-EtAl:2016:COLING,
  author    = {Zubiaga, Arkaitz  and  Kochkina, Elena  and  Liakata, Maria  and  Procter, Rob  and  Lukasik, Michal},
  title     = {Stance Classification in Rumours as a Sequential Task Exploiting the Tree Structure of Social Media Conversations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2438--2448},
  abstract  = {Rumour stance classification, the task that determines if each tweet in a
	collection discussing a rumour is supporting, denying, questioning or simply
	commenting on the rumour, has been attracting substantial interest. Here we
	introduce a novel approach that makes use of the sequence of transitions
	observed in tree-structured conversation threads in Twitter. The conversation
	threads are formed by harvesting users' replies to one another, which results
	in a nested tree-like structure. Previous work addressing the stance
	classification task has treated each tweet as a separate unit. Here we analyse
	tweets by virtue of their position in a sequence and test two sequential
	classifiers, Linear-Chain CRF and Tree CRF, each of which makes different
	assumptions about the conversational structure. We experiment with eight
	Twitter datasets, collected during breaking news, and show that exploiting the
	sequential structure of Twitter conversations achieves significant improvements
	over the non-sequential methods. Our work is the first to model Twitter
	conversations as a tree structure in this manner, introducing a novel way of
	tackling NLP tasks on Twitter conversations.},
  url       = {http://aclweb.org/anthology/C16-1230}
}

@InProceedings{zhang-zhang-fu:2016:COLING,
  author    = {Zhang, Meishan  and  Zhang, Yue  and  Fu, Guohong},
  title     = {Tweet Sarcasm Detection Using Deep Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2449--2460},
  abstract  = {Sarcasm detection has been modeled as a binary document classification task,
	with rich features being defined manually over input documents.
	Traditional models employ discrete manual features to address the task,
	with much research effect being devoted to the design of effective feature
	templates.
	We investigate the use of neural network for tweet sarcasm detection,
	and compare the effects of the continuous automatic features with discrete
	manual features.
	In particular, we use a bi-directional gated recurrent neural network to
	capture syntactic and semantic information over tweets locally,
	and a pooling neural network to extract contextual features automatically from
	history tweets.
	Results show that neural features give improved accuracies for sarcasm
	detection,
	with different error distributions compared with discrete manual features.},
  url       = {http://aclweb.org/anthology/C16-1231}
}

@InProceedings{menini-tonelli:2016:COLING,
  author    = {Menini, Stefano  and  Tonelli, Sara},
  title     = {Agreement and Disagreement: Comparison of Points of View in the Political Domain},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2461--2470},
  abstract  = {The automated comparison of points of view between two politicians is a very
	challenging task, due not only to the lack of annotated resources, but also to
	the different dimensions participating to the definition of agreement and
	disagreement.
	In order to shed light on this complex task, we first carry out a pilot study
	to manually annotate the components involved in detecting agreement and
	disagreement. Then, based on these findings, we implement different features to
	capture them automatically via supervised classification.  We do not focus on
	debates in dialogical form, but we rather consider sets of documents, in which
	politicians may express their position with respect to different topics in an
	implicit or explicit way, like during an electoral campaign. We create and make
	available three different datasets.},
  url       = {http://aclweb.org/anthology/C16-1232}
}

@InProceedings{welch-mihalcea:2016:COLING,
  author    = {Welch, Charles  and  Mihalcea, Rada},
  title     = {Targeted Sentiment to Understand Student Comments},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2471--2481},
  abstract  = {We address the task of targeted sentiment as a means of understanding the
	sentiment that students hold toward courses and instructors, as expressed by
	students in their comments. We introduce a new dataset consisting of student
	comments annotated for targeted sentiment and describe a system that can both
	identify the courses and instructors mentioned in student comments, as well as
	label the students' sentiment toward those entities. Through several
	comparative evaluations, we show that our system outperforms previous work on a
	similar task.},
  url       = {http://aclweb.org/anthology/C16-1233}
}

@InProceedings{joshi-EtAl:2016:COLING,
  author    = {Joshi, Aditya  and  Prabhu, Ameya  and  Shrivastava, Manish  and  Varma, Vasudeva},
  title     = {Towards Sub-Word Level Compositions for Sentiment Analysis of Hindi-English Code Mixed Text},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2482--2491},
  abstract  = {Sentiment analysis (SA) using code-mixed data from social media has several
	applications in opinion mining ranging from customer satisfaction to social
	campaign analysis in multilingual societies. Advances in this area are impeded
	by the lack of a suitable annotated dataset. We introduce a Hindi-English
	(Hi-En) code-mixed dataset for sentiment analysis and perform empirical
	analysis comparing the suitability and performance of various state-of-the-art
	SA methods in social media.  
	In this paper, we introduce learning sub-word level representations in our LSTM
	(Subword-LSTM) architecture instead of character-level or word-level
	representations. This linguistic prior in our architecture enables us to learn
	the information about sentiment value of important morphemes. This also seems
	to work well in highly noisy text containing misspellings as shown in our
	experiments which is demonstrated in morpheme-level feature maps learned by our
	model. Also, we hypothesize that encoding this linguistic prior in the
	Subword-LSTM architecture leads to the superior performance. Our system attains
	accuracy 4-5% greater than traditional approaches on our dataset, and also
	outperforms the available system for sentiment analysis in Hi-En code-mixed
	text by 18%.},
  url       = {http://aclweb.org/anthology/C16-1234}
}

@InProceedings{xiong-EtAl:2016:COLING,
  author    = {Xiong, Shufeng  and  Zhang, Yue  and  JI, Donghong  and  Lou, Yinxia},
  title     = {Distance Metric Learning for Aspect Phrase Grouping},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2492--2502},
  abstract  = {Aspect phrase grouping is an important task in aspect-level sentiment analysis.
	It is a challenging problem due to polysemy and context dependency. We propose
	an Attention-based Deep Distance Metric Learning (ADDML) method, by considering
	aspect phrase representation as well as context representation. First,
	leveraging the characteristics of the review text, we automatically generate
	aspect phrase sample pairs for distant supervision. Second, we feed word
	embeddings of aspect phrases and their contexts into an attention-based neural
	network to learn feature representation of contexts. Both aspect phrase
	embedding and context embedding are used to learn a deep feature subspace for
	measure the distances between aspect phrases for K-means clustering.
	Experiments on four review datasets show that the proposed method outperforms
	state-of-the-art strong baseline methods.},
  url       = {http://aclweb.org/anthology/C16-1235}
}

@InProceedings{bao-EtAl:2016:COLING,
  author    = {Bao, Junwei  and  Duan, Nan  and  Yan, Zhao  and  Zhou, Ming  and  Zhao, Tiejun},
  title     = {Constraint-Based Question Answering with Knowledge Graph},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2503--2514},
  abstract  = {WebQuestions and SimpleQuestions are two benchmark data-sets commonly used in
	recent knowledge-based question answering (KBQA) work.
	Most questions in them are `simple' questions which can be answered based on a
	single relation in the knowledge base.
	Such data-sets lack the capability of evaluating KBQA systems on complicated
	questions.
	Motivated by this issue, we release a new data-set, namely ComplexQuestions,
	aiming to measure the quality of KBQA systems on 'multi-constraint' questions
	which require multiple knowledge base relations to get the answer.
	Beside, we propose a novel systematic KBQA approach to solve multi-constraint
	questions.
	Compared to state-of-the-art methods, our approach not only obtains comparable
	results on the two existing benchmark data-sets, but also achieves significant
	improvements on the ComplexQuestions.},
  url       = {http://aclweb.org/anthology/C16-1236}
}

@InProceedings{barroncedeno-EtAl:2016:COLING,
  author    = {Barr\'{o}n-Cede\~{n}o, Alberto  and  Da San Martino, Giovanni  and  Romeo, Salvatore  and  Moschitti, Alessandro},
  title     = {Selecting Sentences versus Selecting Tree Constituents for Automatic Question Ranking},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2515--2525},
  abstract  = {Community question answering (cQA) websites are focused on users who query
	questions onto an online forum, expecting for other users to provide them
	answers or suggestions. Unlike other social media, the length of the posted
	queries has no limits and queries tend to be multi-sentence elaborations
	combining context, actual questions, and irrelevant information. We approach
	the
	problem of question ranking: given a user’s new question, to retrieve those
	previously-posted questions which could be equivalent, or highly relevant. This
	could prevent the posting of nearly-duplicate questions and provide the user
	with instantaneous answers. For the first time in cQA, we address the selection
	of relevant text —both at sentence- and at constituent-level— for parse
	tree-
	based representations. Our supervised models for text selection boost the
	performance of a tree kernel-based machine learning model, allowing it to
	overtake the current state of the art on a recently released cQA evaluation
	framework.},
  url       = {http://aclweb.org/anthology/C16-1237}
}

@InProceedings{shen-huang:2016:COLING,
  author    = {shen, yatian  and  Huang, Xuanjing},
  title     = {Attention-Based Convolutional Neural Network for Semantic Relation Extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2526--2536},
  abstract  = {Nowadays, neural networks play an
	important role in the task of relation classification.
	In this paper,
	we propose a novel attention-based convolutional
	neural network architecture for this task. Our model makes full use of
	word embedding, part-of-speech tag embedding and position embedding
	information.
	Word level attention mechanism is able to
	better determine which parts of the sentence
	are most influential with respect to the two entities
	of interest.
	This architecture enables learning some important features from task-specific
	labeled
	data, forgoing the need for external
	knowledge such as explicit dependency structures.
	Experiments on the SemEval-2010 Task 8
	benchmark dataset show that our model
	achieves better performances than several state-of-the-art neural network
	models and can achieve a competitive performance just with minimal feature
	engineering.},
  url       = {http://aclweb.org/anthology/C16-1238}
}

@InProceedings{gupta-schutze-andrassy:2016:COLING,
  author    = {Gupta, Pankaj  and  Sch\"{u}tze, Hinrich  and  Andrassy, Bernt},
  title     = {Table Filling Multi-Task Recurrent Neural Network for Joint Entity and Relation Extraction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2537--2547},
  abstract  = {This paper proposes a novel context-aware joint entity and word-level relation
	extraction approach through semantic composition of words, introducing a Table
	Filling Multi-Task Recurrent Neural Network (TF-MTRNN) model that reduces the
	entity recognition and relation classification tasks to a table-filling problem
	and models their interdependencies. The proposed neural
	network architecture is capable of modeling multiple relation instances without
	knowing the corresponding relation arguments in a sentence. The experimental
	results show that a simple approach of piggybacking candidate entities to model
	the label dependencies from relations to entities improves performance.
	We present state-of-the-art results with improvements of 2.0% and 2.7% for
	entity recognition and relation classification, respectively on CoNLL04
	dataset.},
  url       = {http://aclweb.org/anthology/C16-1239}
}

@InProceedings{zhang-EtAl:2016:COLING5,
  author    = {Zhang, Biao  and  Xiong, Deyi  and  su, jinsong  and  Duan, Hong  and  Zhang, Min},
  title     = {Bilingual Autoencoders with Global Descriptors for Modeling Parallel Sentences},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2548--2558},
  abstract  = {Parallel sentence representations are important for bilingual and cross-lingual
	tasks in natural language
	processing. In this paper, we explore a bilingual autoencoder approach to model
	parallel
	sentences. We extract sentence-level global descriptors (e.g. min, max) from
	word embeddings,
	and construct two monolingual autoencoders over these descriptors on the source
	and target language.
	In order to tightly connect the two autoencoders with bilingual
	correspondences, we force
	them to share the same decoding parameters and minimize a corpus-level semantic
	distance between
	the two languages. Being optimized towards a joint objective function of
	reconstruction
	and semantic errors, our bilingual antoencoder is able to learn
	continuous-valued latent representations
	for parallel sentences. Experiments on both intrinsic and extrinsic evaluations
	on statistical
	machine translation tasks show that our autoencoder achieves substantial
	improvements over
	the baselines.},
  url       = {http://aclweb.org/anthology/C16-1240}
}

@InProceedings{pal-naskar-vangenabith:2016:COLING,
  author    = {Pal, Santanu  and  Naskar, Sudip Kumar  and  van Genabith, Josef},
  title     = {Multi-Engine and Multi-Alignment Based Automatic Post-Editing and its Impact on Translation Productivity},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2559--2570},
  abstract  = {In this paper we combine two strands of machine translation (MT) research:
	automatic post-editing (APE) and multi-engine (system combination) MT. APE
	systems learn a target-language-side second stage MT system from the data
	produced by human corrected output of a first stage MT system, to improve the
	output of the first stage MT in what is essentially a sequential MT system
	combination architecture. At the same time, there is a rich research literature
	on parallel MT system combination where the same input is fed to multiple
	engines and the best output is selected or smaller sections of the outputs are
	combined to obtain improved translation output. In the paper we show that
	parallel system combination in the APE stage of a sequential MT-APE combination
	yields substantial translation improvements both measured in terms of automatic
	evaluation metrics as well as in terms of productivity improvements measured in
	a post-editing experiment. We also show that system combination on the level of
	APE alignments yields further improvements. Overall our APE system yields
	statistically significant improvement of 5.9% relative BLEU over a strong
	baseline (English--Italian Google MT) and 21.76% productivity increase in a
	human post-editing experiment with professional translators.},
  url       = {http://aclweb.org/anthology/C16-1241}
}

@InProceedings{vanderwees-bisazza-monz:2016:COLING,
  author    = {van der Wees, Marlies  and  Bisazza, Arianna  and  Monz, Christof},
  title     = {Measuring the Effect of Conversational Aspects on Machine Translation Quality},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2571--2581},
  abstract  = {Research in statistical machine translation (SMT) is largely driven by formal
	translation tasks, while translating informal text is much more challenging. In
	this paper we focus on SMT for the informal genre of dialogues, which has
	rarely been addressed to date. Concretely, we investigate the effect of
	dialogue acts, speakers, gender, and text register on SMT quality when
	translating fictional dialogues. We first create and release a corpus of
	multilingual movie dialogues annotated with these four dialogue-specific
	aspects. When measuring translation performance for each of these variables, we
	find that BLEU fluctuations between their categories are often significantly
	larger than randomly expected. Following this finding, we hypothesize and show
	that SMT of fictional dialogues benefits from adaptation towards dialogue acts
	and registers. Finally, we find that male speakers are harder to translate and
	use more vulgar language than female speakers, and that vulgarity is often not
	preserved during translation.},
  url       = {http://aclweb.org/anthology/C16-1242}
}

@InProceedings{passban-liu-way:2016:COLING,
  author    = {Passban, Peyman  and  Liu, Qun  and  Way, Andy},
  title     = {Enriching Phrase Tables for Statistical Machine Translation Using Mixed Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2582--2591},
  abstract  = {The phrase table is considered to be the main bilingual resource for the
	phrase-based statistical machine translation (PBSMT) model. During translation,
	a source sentence is decomposed into several phrases. The best match of each
	source phrase is selected among several target-side counterparts within the
	phrase table, and processed by the decoder to generate a sentence-level
	translation. The best match is chosen according to several factors, including a
	set of bilingual features. PBSMT engines by default provide four probability
	scores in phrase tables which are considered as the main set of bilingual
	features. Our goal is to enrich that set of features, as a better feature set
	should yield better translations. We propose new scores generated by a
	Convolutional Neural Network (CNN) which indicate the semantic relatedness of
	phrase pairs. We evaluate our model in different experimental settings with
	different language pairs. We observe significant improvements when the proposed
	features are incorporated into the PBSMT pipeline.},
  url       = {http://aclweb.org/anthology/C16-1243}
}

@InProceedings{song-EtAl:2016:COLING2,
  author    = {Song, Wei  and  Fu, Ruiji  and  Liu, Lizhen  and  Wang, Hanshi  and  Liu, Ting},
  title     = {Anecdote Recognition and Recommendation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2592--2602},
  abstract  = {We introduce a novel task Anecdote Recognition and Recommendation. An anecdote
	is a story with a point revealing account of an individual person. Recommending
	proper anecdotes can be used as evidence to support argumentative writing or as
	a clue for further reading.
	We represent an anecdote as a structured tuple — < person, story, implication
	>. Anecdote recognition runs on archived argumentative essays. We extract
	narratives containing events of a person as the anecdote story. More
	importantly, we uncover the anecdote implication, which reveals the meaning and
	topic of an anecdote. Our approach depends on discourse role identification.
	Discourse roles such as thesis, main ideas and support help us locate stories
	and their implications in essays. The experiments show that informative and
	interpretable anecdotes can be recognized. These anecdotes are used for
	anecdote recommendation. The anecdote recommender can recommend proper
	anecdotes in response to given topics. The anecdote implication contributes
	most for bridging user interested topics and relevant anecdotes.},
  url       = {http://aclweb.org/anthology/C16-1244}
}

@InProceedings{jiang-carenini-ng:2016:COLING,
  author    = {Jiang, Kailang  and  Carenini, Giuseppe  and  Ng, Raymond},
  title     = {Training Data Enrichment for Infrequent Discourse Relations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2603--2614},
  abstract  = {Discourse parsing is a popular technique widely used in text understanding,
	sentiment analysis and other NLP tasks. However, for most discourse parsers,
	the performance varies significantly across different discourse relations. In
	this paper, we first validate the underfitting hypothesis, i.e., the less
	frequent a relation is in the training data, the poorer the performance on that
	relation. We then explore how to increase the number of positive training
	instances, without resorting to manually creating additional labeled data. We
	propose a training data enrichment framework that relies on co-training of two
	different discourse parsers on unlabeled documents. Importantly, we show that
	co-training alone is not sufficient. The framework requires a filtering step to
	ensure that only “good quality” unlabeled documents can be used for
	enrichment and re-training. We propose and evaluate two ways to perform the
	filtering. The first is to use an agreement score between the two parsers. The
	second is to use only the confidence score of the faster parser. Our empirical
	results show that agreement score can help to boost the performance on
	infrequent relations, and that the confidence score is a viable approximation
	of the agreement score for infrequent relations.},
  url       = {http://aclweb.org/anthology/C16-1245}
}

@InProceedings{zhang-litman-forbesriley:2016:COLING,
  author    = {Zhang, Fan  and  Litman, Diane  and  Forbes-Riley, Katherine},
  title     = {Inferring Discourse Relations from PDTB-style Discourse Labels for Argumentative Revision Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2615--2624},
  abstract  = {Penn Discourse Treebank (PDTB)-style annotation focuses on labeling local
	discourse relations between text spans and typically ignores larger discourse
	contexts. In this paper we propose two approaches to infer discourse relations
	in a  paragraph-level context from annotated PDTB labels.  We investigate the
	utility of inferring such discourse information using the task of revision
	classification. Experimental results demonstrate that the inferred information
	can significantly improve classification performance compared to baselines, not
	only when PDTB annotation comes from humans but also from automatic parsers.},
  url       = {http://aclweb.org/anthology/C16-1246}
}

@InProceedings{kabbara-feng-cheung:2016:COLING,
  author    = {Kabbara, Jad  and  Feng, Yulan  and  Cheung, Jackie Chi Kit},
  title     = {Capturing Pragmatic Knowledge in Article Usage Prediction using LSTMs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2625--2634},
  abstract  = {We examine the potential of recurrent neural networks for handling pragmatic
	inferences involving complex contextual cues for the task of article usage
	prediction. We train and compare several variants of Long Short-Term Memory
	(LSTM) networks with an attention mechanism. Our model outperforms a previous
	state-of-the-art system, achieving up to 96.63% accuracy on the WSJ/PTB corpus.
	In addition, we perform a series of analyses to understand the impact of
	various model choices. We find that the gain in performance can be attributed
	to the ability of LSTMs to pick up on contextual cues, both local and further
	away in distance, and that the model is able to solve cases involving reasoning
	about coreference and synonymy. We also show how the attention mechanism
	contributes to the interpretability of the model's effectiveness.},
  url       = {http://aclweb.org/anthology/C16-1247}
}

@InProceedings{pateria:2016:COLING,
  author    = {Pateria, Shubham},
  title     = {Aspect Based Sentiment Analysis using Sentiment Flow with Local and Non-local Neighbor Information},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2635--2646},
  abstract  = {Aspect-level analysis of sentiments contained in a review text is important to
	reveal a detailed picture of consumer opinions. While a plethora of methods
	have been traditionally employed for this task, majority focus has been on
	analyzing only aspect-centered local information. However, incorporating
	context information from non-local aspect neighbors may capture richer
	structure in review text and enhance prediction. This may especially be helpful
	to resolve ambiguous predictions. The context around an aspect can be
	incorporated using semantic relations within text and inter-label dependencies
	in the output. On the output side, this becomes a structured prediction task.
	However, non-local label correlations are computationally heavy and intractable
	to infer for structured prediction models like Conditional Random Fields (CRF).
	Moreover, some prior intuition is required to incorporate non-local context.
	Thus, inspired by previous research on multi-stage prediction, we propose a
	two-level model for aspect-based analysis. The proposed model uses predicted
	probability estimates from first level to incorporate neighbor information in
	the second level. The model is evaluated on data taken from SemEval Workshops
	and Bing Liu's review collection. It shows comparatively better performance
	against few existing methods. Overall, we get prediction accuracy in a range of
	83-88\% and almost 3-4 point increment against baseline (first level only)
	scores.},
  url       = {http://aclweb.org/anthology/C16-1248}
}

@InProceedings{li-EtAl:2016:COLING8,
  author    = {Li, Shoushan  and  Xu, Jian  and  Zhang, Dong  and  Zhou, Guodong},
  title     = {Two-View Label Propagation to Semi-supervised Reader Emotion Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2647--2655},
  abstract  = {In the literature, various supervised learning approaches have been adopted to
	address the task of reader emotion classification. However, the classification
	performance greatly suffers when the size of the labeled data is limited. In
	this paper, we propose a two-view label propagation approach to semi-supervised
	reader emotion classification by exploiting two views, namely source text and
	response text in a label propagation algorithm. Specifically, our approach
	depends on two word-document bipartite graphs to model the relationship among
	the samples in the two views respectively. Besides, the two bipartite graphs
	are integrated by linking each source text sample with its corresponding
	response text sample via a length-sensitive transition probability. In this
	way, our two-view label propagation approach to semi-supervised reader emotion
	classification largely alleviates the reliance on the strong sufficiency and
	independence assumptions of the two views, as required in co-training.
	Empirical evaluation demonstrates the effectiveness of our two-view label
	propagation approach to semi-supervised reader emotion classification.},
  url       = {http://aclweb.org/anthology/C16-1249}
}

@InProceedings{ebrahimi-dou-lowd:2016:COLING,
  author    = {Ebrahimi, Javid  and  Dou, Dejing  and  Lowd, Daniel},
  title     = {A Joint Sentiment-Target-Stance Model for Stance Classification in Tweets},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2656--2665},
  abstract  = {Classifying the stance expressed in online microblogging social media is an
	emerging problem in opinion mining. We propose a probabilistic approach to
	stance classification in tweets, which models stance, target of stance, and
	sentiment of tweet, jointly. Instead of simply conjoining the sentiment or
	target variables as extra variables to the feature space, we use a novel
	formulation to incorporate three-way interactions among sentiment-stance-input
	variables and three-way interactions among target-stance-input variables.
	The proposed specification intuitively aims to discriminate sentiment features
	from target features for stance classification.
	In addition, regularizing a single stance classifier, which handles all
	targets, acts as a soft weight-sharing among them. We demonstrate that
	discriminative training of this model achieves the state-of-the-art results in
	supervised stance classification, and its generative training obtains
	competitive results in the weakly supervised setting.},
  url       = {http://aclweb.org/anthology/C16-1250}
}

@InProceedings{cambria-EtAl:2016:COLING,
  author    = {Cambria, Erik  and  Poria, Soujanya  and  Bajpai, Rajiv  and  Schuller, Bjoern},
  title     = {SenticNet 4: A Semantic Resource for Sentiment Analysis Based on Conceptual Primitives},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2666--2677},
  abstract  = {An important difference between traditional AI systems and human intelligence
	is the human ability to harness commonsense knowledge gleaned from a lifetime
	of learning and experience to make informed decisions. This allows humans to
	adapt easily to novel situations where AI fails catastrophically due to a lack
	of situation-specific rules and generalization capabilities. Commonsense
	knowledge also provides background information that enables humans to
	successfully operate in social situations where such knowledge is typically
	assumed. Since commonsense consists of information that humans take for
	granted, gathering it is an extremely difficult task. Previous versions of
	SenticNet were focused on collecting this kind of knowledge for sentiment
	analysis but they were heavily limited by their inability to generalize.
	SenticNet 4 overcomes such limitations by leveraging on conceptual primitives
	automatically generated by means of hierarchical clustering and dimensionality
	reduction.},
  url       = {http://aclweb.org/anthology/C16-1251}
}

@InProceedings{li-EtAl:2016:COLING9,
  author    = {Li, Yuezhang(Music)  and  Zheng, Ronghuo  and  Tian, Tian  and  Hu, Zhiting  and  Iyer, Rahul  and  Sycara, Katia},
  title     = {Joint Embedding of Hierarchical Categories and Entities for Concept Categorization and Dataless Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2678--2688},
  abstract  = {Existing work learning distributed representations of knowledge base entities
	has largely failed to incorporate rich categorical structure, and is unable to
	induce category representations.
	We propose a new framework that embeds entities and categories jointly into a
	semantic space, by integrating structured knowledge and taxonomy hierarchy from
	large knowledge bases. Our framework enables to compute meaningful semantic
	relatedness between entities and categories in a principled way, and can handle
	both single-word and multiple-word concepts. Our method shows significant
	improvement on the tasks of concept categorization and dataless hierarchical
	classification.},
  url       = {http://aclweb.org/anthology/C16-1252}
}

@InProceedings{jiang-EtAl:2016:COLING3,
  author    = {Jiang, Di  and  Shi, Lei  and  Lian, Rongzhong  and  Wu, Hua},
  title     = {Latent Topic Embedding},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2689--2698},
  abstract  = {Topic modeling and word embedding are two important techniques for deriving
	latent semantics from data. General-purpose topic models typically work in
	coarse granularity by capturing word co-occurrence at the document/sentence
	level. In contrast, word embedding models usually work in much finer
	granularity by modeling word co-occurrence within small sliding windows. With
	the aim of deriving latent semantics by considering word co-occurrence at
	different levels of granularity,  we propose a novel model named \textit{Latent
	Topic Embedding} (LTE), which seamlessly integrates topic generation and
	embedding learning in one unified framework. We further propose an efficient
	Monte Carlo EM algorithm to estimate the parameters of interest. By retaining
	the individual advantages of topic modeling and word embedding, LTE results in
	better latent topics and word embedding. Extensive experiments verify the
	superiority of LTE over the state-of-the-arts.},
  url       = {http://aclweb.org/anthology/C16-1253}
}

@InProceedings{nguyen-schulteimwalde-vu:2016:COLING,
  author    = {Nguyen, Kim Anh  and  Schulte im Walde, Sabine  and  Vu, Ngoc Thang},
  title     = {Neural-based Noise Filtering from Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2699--2707},
  abstract  = {Word embeddings have been demonstrated to benefit NLP tasks impressively. Yet,
	there is room for improvements in the vector representations, because current
	word embeddings typically contain unnecessary information, i.e., noise. We
	propose two novel models to improve word embeddings by unsupervised learning,
	in order to yield word denoising embeddings. The word denoising embeddings are
	obtained by strengthening salient information and weakening noise in the
	original word embeddings, based on a deep feed-forward neural network filter.
	Results from benchmark tasks show that the filtered word denoising embeddings
	outperform the original word embeddings.},
  url       = {http://aclweb.org/anthology/C16-1254}
}

@InProceedings{aga-EtAl:2016:COLING,
  author    = {Aga, Rosa Tsegaye  and  Drumond, Lucas  and  Wartena, Christian  and  Schmidt-Thieme, Lars},
  title     = {Integrating Distributional and Lexical Information for Semantic Classification of Words using MRMF},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2708--2717},
  abstract  = {Semantic classification of words using distributional features is usually based
	on the semantic similarity of words. We show on two different datasets that a
	trained classifier using the distributional features directly gives better
	results. We use Support Vector Machines (SVM) and Multi-relational Matrix
	Factorization (MRMF) to train classifiers. Both give similar results. However,
	MRMF, that was not used for semantic classification with distributional
	features before, can easily be extended with more matrices containing more
	information from different sources on the same problem. We demonstrate the
	effectiveness of the novel approach by including information from WordNet. Thus
	we show, that MRMF provides an interesting approach for building semantic
	classifiers that (1) gives better results than unsupervised approaches based on
	vector similarity, (2) gives similar results as other supervised methods and
	(3) can naturally be extended with other sources of information in order to
	improve the results.},
  url       = {http://aclweb.org/anthology/C16-1255}
}

@InProceedings{gonen-goldberg:2016:COLING,
  author    = {Gonen, Hila  and  Goldberg, Yoav},
  title     = {Semi Supervised Preposition-Sense Disambiguation using Multilingual Data},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2718--2729},
  abstract  = {Prepositions are very common and very ambiguous, and understanding their sense
	is critical for understanding the meaning of the sentence. Supervised corpora
	for the preposition-sense disambiguation task are small, suggesting a
	semi-supervised approach to the task. We show that signals from unannotated
	multilingual data can be used to improve supervised preposition-sense
	disambiguation. Our approach pre-trains an LSTM encoder for predicting the
	translation of a preposition, and then incorporates the pre-trained encoder as
	a component in a supervised classification system, and fine-tunes it for the
	task. The multilingual signals consistently improve results on two
	preposition-sense datasets.},
  url       = {http://aclweb.org/anthology/C16-1256}
}

@InProceedings{vanhee-lefever-hoste:2016:COLING,
  author    = {Van Hee, Cynthia  and  Lefever, Els  and  Hoste, Veronique},
  title     = {Monday mornings are my fave :) \#not Exploring the Automatic Recognition of Irony in English tweets},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2730--2739},
  abstract  = {Recognising and understanding irony is crucial for the improvement natural
	language processing tasks including sentiment analysis. In this study, we
	describe the construction of an English Twitter corpus and its annotation for
	irony based on a newly developed fine-grained annotation scheme. We also
	explore the feasibility of automatic irony recognition by exploiting a varied
	set of features including lexical, syntactic, sentiment and semantic (Word2Vec)
	information. Experiments on a held-out test set show that our irony classifier
	benefits from this combined information, yielding an F1-score of 67.66%. When
	explicit hashtag information like \#irony is included in the data, the system
	even obtains an F1-score of 92.77%. A qualitative analysis of the out- put
	reveals that recognising irony that results from a polarity clash appears to be
	(much) more feasible than recognising other forms of ironic utterances (e.g.,
	descriptions of situational irony).},
  url       = {http://aclweb.org/anthology/C16-1257}
}

@InProceedings{guggilla-miller-gurevych:2016:COLING,
  author    = {Guggilla, Chinnappa  and  Miller, Tristan  and  Gurevych, Iryna},
  title     = {CNN- and LSTM-based Claim Classification in Online User Comments},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2740--2751},
  abstract  = {When processing arguments in online user interactive discourse, it is often
	necessary to determine their bases of support. In this paper, we describe a
	supervised approach, based on deep neural networks, for classifying the claims
	made in online arguments. We conduct experiments using convolutional neural
	networks (CNNs) and long short-term memory networks (LSTMs) on two claim data
	sets compiled from online user comments. Using different types of
	distributional word embeddings, but without incorporating any rich, expensive
	set of features, we achieve a significant improvement over the state of the art
	for one data set (which categorizes arguments as factual vs. emotional), and
	performance comparable to the state of the art on the other data set (which
	categorizes propositions according to their verifiability). Our approach has
	the advantages of using a generalized, simple, and effective methodology that
	works for claim categorization on different data sets and tasks.},
  url       = {http://aclweb.org/anthology/C16-1258}
}

@InProceedings{peng-feldman:2016:COLING,
  author    = {Peng, JIng  and  Feldman, Anna},
  title     = {Experiments in Idiom Recognition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2752--2761},
  abstract  = {Some expressions can be ambiguous between idiomatic and literal interpretations
	depending on the context they occur in, e.g., `sales hit the roof' vs. `hit the
	roof of the car'. We present a novel method of classifying whether a given
	instance is literal or idiomatic, focusing on verb-noun constructions. We
	report state-of-the-art results on this task using an approach based on the
	hypothesis that the distributions of the contexts of the idiomatic phrases will
	be different from the contexts of the literal usages. We measure contexts by
	using projections of the words into vector space. For comparison, we implement
	Fazly et al. (2009)’s, Sporleder and Li (2009)’s, and Li and Sporleder
	(2010b)’s methods and apply them to our data. We provide experimental results
	validating the proposed techniques.},
  url       = {http://aclweb.org/anthology/C16-1259}
}

@InProceedings{laha-raykar:2016:COLING,
  author    = {Laha, Anirban  and  Raykar, Vikas},
  title     = {An Empirical Evaluation of various Deep Learning Architectures for Bi-Sequence Classification Tasks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2762--2773},
  abstract  = {Several tasks in argumentation mining and debating, question-answering, and
	natural language inference involve classifying a sequence in the context of
	another sequence (referred as bi-sequence classification). For several single
	sequence classification tasks, the current state-of-the-art approaches are
	based on recurrent and convolutional neural networks. On the other hand, for
	bi-sequence classification problems, there is not much understanding as to the
	best deep learning architecture. In this paper, we attempt to get an
	understanding of this category of problems by extensive empirical evaluation of
	19 different deep learning architectures (specifically on different ways of
	handling context) for various problems originating in natural language
	processing like debating, textual entailment and question-answering. Following
	the empirical evaluation, we offer our insights and conclusions regarding the
	architectures we have considered. We also establish the first deep learning
	baselines for three argumentation mining tasks.},
  url       = {http://aclweb.org/anthology/C16-1260}
}

@InProceedings{senuma-aizawa:2016:COLING,
  author    = {Senuma, Hajime  and  Aizawa, Akiko},
  title     = {Learning Succinct Models: Pipelined Compression with L1-Regularization, Hashing, Elias-Fano Indices, and Quantization},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2774--2784},
  abstract  = {The recent proliferation of smart devices necessitates
	  methods to learn small-sized models.
	This paper demonstrates that
	if there are $m$ features in total but
	only $n = o(\sqrt{m})$ features are required to distinguish examples,
	with $\Omega(\log m)$ training examples and reasonable settings,
	it is possible to obtain a good model in a \textit{succinct} representation
	using $n \log\_2 \frac{m}{n} + o(m)$ bits,
	by using a pipeline of existing compression methods: L1-regularized logistic
	regression, feature hashing, Elias--Fano indices, and randomized quantization.
	An experiment shows that a noun phrase chunking task
	for which an existing library requires 27 megabytes can be compressed to less
	than 13 \underline{kilo}bytes without notable loss of accuracy.},
  url       = {http://aclweb.org/anthology/C16-1261}
}

@InProceedings{hellrich-hahn:2016:COLING,
  author    = {Hellrich, Johannes  and  Hahn, Udo},
  title     = {Bad Company—Neighborhoods in Neural Embedding Spaces Considered Harmful},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2785--2796},
  abstract  = {We assess the reliability and accuracy of (neural) word embeddings for both
	modern and historical English and German. Our research provides deeper insights
	into the empirically justified choice of optimal training methods and
	parameters. The overall low reliability we observe, nevertheless, casts doubt
	on the suitability of word neighborhoods in embedding spaces as a basis for
	qualitative conclusions on synchronic and diachronic lexico-semantic matters,
	an issue currently high up in the agenda of Digital Humanities.},
  url       = {http://aclweb.org/anthology/C16-1262}
}

@InProceedings{thorat-choudhari:2016:COLING,
  author    = {Thorat, Sushrut  and  Choudhari, Varad},
  title     = {Implementing a Reverse Dictionary, based on word definitions, using a Node-Graph Architecture},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2797--2806},
  abstract  = {In this paper, we outline an approach to build graph-based reverse dictionaries
	using word definitions. A reverse dictionary takes a phrase as an input and
	outputs a list of words semantically similar to that phrase. It is a solution
	to the Tip-of-the-Tongue problem. We use a distance-based similarity measure,
	computed on a graph, to assess the similarity between a word and the input
	phrase. We compare the performance of our approach with the Onelook Reverse
	Dictionary and a distributional semantics method based on word2vec, and show
	that our approach is much better than the distributional semantics method, and
	as good as Onelook, on a 3k lexicon. This simple approach sets a new
	performance baseline for reverse dictionaries.},
  url       = {http://aclweb.org/anthology/C16-1263}
}

@InProceedings{collell-moens:2016:COLING,
  author    = {Collell, Guillem  and  Moens, Marie-Francine},
  title     = {Is an Image Worth More than a Thousand Words? On the Fine-Grain Semantic Differences between Visual and Linguistic Representations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2807--2817},
  abstract  = {Human concept representations are often grounded with visual information, yet
	some aspects of meaning cannot be visually represented or are better described
	with language. Thus, vision and language provide complementary information
	that, properly combined, can potentially yield more complete concept
	representations. Recently, state-of-the-art distributional semantic models and
	convolutional neural networks have achieved great success in representing
	linguistic and visual knowledge respectively. In this paper, we compare both,
	visual and linguistic representations in their ability to capture different
	types of fine-grain semantic knowledge---or attributes---of concepts. Humans
	often describe objects using attributes, that is, properties such as shape,
	color or functionality, which often transcend the linguistic and visual
	modalities. In our setting, we evaluate how well attributes can be predicted by
	using the unimodal representations as inputs. We are interested in first,
	finding out whether attributes are generally better captured by either the
	vision or by the language modality; and second, if none of them is clearly
	superior (as we hypothesize), what type of attributes or semantic knowledge are
	better encoded from each modality. Ultimately, our study sheds light on the
	potential of combining visual and textual representations.},
  url       = {http://aclweb.org/anthology/C16-1264}
}

@InProceedings{mirza-tonelli:2016:COLING2,
  author    = {Mirza, Paramita  and  Tonelli, Sara},
  title     = {On the contribution of word embeddings to temporal relation classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2818--2828},
  abstract  = {Temporal relation classification is a challenging task, especially when there
	are no explicit markers to characterise the relation between temporal entities.
	This occurs frequently in inter-sentential relations, whose entities are not
	connected via direct syntactic relations making classification even more
	difficult. In these cases, resorting to features that focus on the semantic
	content of the event words may be very beneficial for inferring implicit
	relations. Specifically, while morpho-syntactic and context features are
	considered sufficient for classifying event-timex pairs, we believe that
	exploiting distributional semantic information about event words can benefit
	supervised classification of other types of pairs. In this work, we assess the
	impact of using word embeddings as features for event words in classifying
	temporal relations of event-event pairs and event-DCT (document creation time)
	pairs.},
  url       = {http://aclweb.org/anthology/C16-1265}
}

@InProceedings{inoue-EtAl:2016:COLING,
  author    = {Inoue, Naoya  and  Matsubayashi, Yuichiroh  and  Ono, Masayuki  and  Okazaki, Naoaki  and  Inui, Kentaro},
  title     = {Modeling Context-sensitive Selectional Preference with Distributed Representations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2829--2838},
  abstract  = {This paper proposes a novel problem setting of selectional preference (SP)
	between a predicate and its arguments, called as context-sensitive SP (CSP).
	CSP models the narrative consistency between the predicate and preceding
	contexts of its arguments, in addition to the conventional SP based on semantic
	types. Furthermore, we present a novel CSP model that extends the neural SP
	model (Van de Cruys, 2014) to incorporate contextual information into the
	distributed representations of arguments. Experimental results demonstrate that
	the proposed CSP model successfully learns CSP and outperforms the conventional
	SP model in coreference cluster ranking.},
  url       = {http://aclweb.org/anthology/C16-1266}
}

@InProceedings{petersen-hellwig:2016:COLING,
  author    = {Petersen, Wiebke  and  Hellwig, Oliver},
  title     = {Exploring the value space of attributes: Unsupervised bidirectional clustering of adjectives in German},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2839--2848},
  abstract  = {The paper presents an iterative bidirectional clustering of adjectives and
	nouns based on a co-occurrence matrix. The clustering method combines a Vector
	Space Models (VSM) and the results of a Latent Dirichlet Allocation (LDA),
	whose results are merged in each iterative step. The aim is to derive a
	clustering of German adjectives that reflects latent semantic classes of
	adjectives, and that can be used to induce frame-based representations of nouns
	in a later step.
	We are able to show that the method induces meaningful groups of adjectives,
	and that it outperforms a baseline k-means algorithm.},
  url       = {http://aclweb.org/anthology/C16-1267}
}

@InProceedings{kartsaklis-sadrzadeh:2016:COLING,
  author    = {Kartsaklis, Dimitri  and  Sadrzadeh, Mehrnoosh},
  title     = {Distributional Inclusion Hypothesis for Tensor-based Composition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2849--2860},
  abstract  = {According to the distributional inclusion hypothesis, entailment between words
	can be measured via the feature inclusions of their distributional vectors. In
	recent work, we showed how this hypothesis can be extended from words to  
	phrases and sentences in the setting of compositional distributional semantics.
	This paper focuses on inclusion properties of tensors; its main contribution
	is a theoretical and experimental analysis of how feature inclusion works in
	different concrete models of verb tensors. We present results for relational,
	Frobenius,  projective, and holistic  methods and compare them to the simple
	vector addition, multiplication, min, and max models. The degrees of entailment
	thus obtained are evaluated via a variety of existing word-based measures, such
	as Weed's and Clarke's, KL-divergence, APinc, balAPinc, and two of our
	previously proposed metrics at the phrase/sentence level. We perform
	experiments on three entailment datasets, investigating which version of
	tensor-based composition achieves the highest performance when combined with
	the sentence-level measures.},
  url       = {http://aclweb.org/anthology/C16-1268}
}

@InProceedings{maki-nishikawa-tokunaga:2016:COLING,
  author    = {Maki, Ryosuke  and  Nishikawa, Hitoshi  and  Tokunaga, Takenobu},
  title     = {Parameter estimation of Japanese predicate argument structure analysis model using eye gaze information},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2861--2869},
  abstract  = {In this paper, we propose utilising eye gaze information for
	  estimating parameters of a Japanese predicate argument structure
	  (PAS) analysis model.  We employ not only linguistic information in
	  the text, but also the information of annotator eye gaze during
	  their annotation process.  We hypothesise that annotator's frequent
	  looks at certain candidates imply their plausibility of being the
	  argument of the predicate. Based on this hypothesis, we consider
	  annotator eye gaze for estimating the model parameters of the PAS
	  analysis. The evaluation experiment showed that introducing eye gaze
	  information increased the accuracy of the PAS analysis by 0.05
	  compared with the conventional methods.},
  url       = {http://aclweb.org/anthology/C16-1269}
}

@InProceedings{sha-EtAl:2016:COLING,
  author    = {Sha, Lei  and  Chang, Baobao  and  Sui, Zhifang  and  Li, Sujian},
  title     = {Reading and Thinking: Re-read LSTM Unit for Textual Entailment Recognition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2870--2879},
  abstract  = {Recognizing Textual Entailment (RTE) is a fundamentally important task in
	natural language processing that has many applications. The recently released
	Stanford Natural Language Inference (SNLI) corpus has made it possible to
	develop and evaluate deep neural network methods for the RTE task. Previous
	neural network based methods usually try to encode the two sentences (premise
	and hypothesis) and send them together into a multi-layer perceptron to get
	their entailment type, or use LSTM-RNN to link two sentences together while
	using attention mechanic to enhance the model’s ability. In this paper, we
	propose to use the re-read mechanic, which means to read the premise again and
	again while reading the hypothesis. After read the premise again, the model can
	get a better understanding of the premise, which can also affect the
	understanding of the hypothesis. On the contrary, a better understanding of the
	hypothesis can also affect the understanding of the premise. With the
	alternative re-read process, the model can “think” of a better decision of
	entailment type. We designed a new LSTM unit called re-read LSTM (rLSTM) to
	implement this “thinking” process. Experiments show that we achieve results
	better than current state-of-the-art equivalents.
	Author{3}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1270}
}

@InProceedings{dey-shrivastava-kaushik:2016:COLING,
  author    = {Dey, Kuntal  and  Shrivastava, Ritvik  and  Kaushik, Saroj},
  title     = {A Paraphrase and Semantic Similarity Detection System for User Generated Short-Text Content on Microblogs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2880--2890},
  abstract  = {Existing systems deliver high accuracy and F1-scores for detecting paraphrase
	and semantic similarity on traditional clean-text corpus. For instance, on the
	clean-text Microsoft Paraphrase benchmark database, the existing systems attain
	an accuracy as high as 0:8596. However, existing systems for detecting
	paraphrases and semantic similarity on user-generated short-text content on
	microblogs such as Twitter, comprising of noisy and ad hoc short-text, needs
	significant research attention. In this paper, we propose a machine learning
	based approach towards this. We propose a set of features that, although
	well-known in the NLP literature for solving other problems, have not been
	explored for detecting paraphrase or semantic similarity, on noisy
	user-generated short-text data such as Twitter. We apply support vector machine
	(SVM) based learning. We use the benchmark Twitter paraphrase data, released as
	a part of SemEval 2015, for experiments. Our system delivers a paraphrase
	detection F1-score of 0.717 and semantic similarity detection F1-score of
	0.741, thereby significantly outperforming the existing systems, that deliver
	F1-scores of 0.696 and 0.724 for the two problems respectively. Our features
	also allow us to obtain a rank among the top-10, when trained on the Microsoft
	Paraphrase corpus and tested on the corresponding test data, thereby
	empirically establishing our approach as ubiquitous across the different
	paraphrase detection databases.},
  url       = {http://aclweb.org/anthology/C16-1271}
}

@InProceedings{levy-EtAl:2016:COLING,
  author    = {Levy, Omer  and  Dagan, Ido  and  Stanovsky, Gabriel  and  Eckle-Kohler, Judith  and  Gurevych, Iryna},
  title     = {Modeling Extractive Sentence Intersection via Subtree Entailment},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2891--2901},
  abstract  = {Sentence intersection captures the semantic overlap of two texts, generalizing
	over paradigms such as textual entailment and semantic text similarity. Despite
	its modeling power, it has received little attention because it is difficult
	for non-experts to annotate. We analyze 200 pairs of similar sentences and
	identify several underlying properties of sentence intersection. We leverage
	these insights to design an algorithm that decomposes the sentence intersection
	task into several simpler annotation tasks, facilitating the construction of a
	high quality dataset via crowdsourcing. We implement this approach and provide
	an annotated dataset of 1,764 sentence intersections.},
  url       = {http://aclweb.org/anthology/C16-1272}
}

@InProceedings{han-sun:2016:COLING,
  author    = {Han, Xianpei  and  Sun, Le},
  title     = {Context-Sensitive Inference Rule Discovery: A Graph-Based Method},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2902--2911},
  abstract  = {Inference rule discovery aims to identify entailment relations between
	predicates, e.g., ‘X acquire Y --> X purchase Y’ and ‘X is author of Y
	--> X write Y’. Traditional methods dis-cover inference rules by computing
	distributional similarities between predicates, with each predicate is
	represented as one or more feature vectors of its instantiations. These
	methods, however, have two main drawbacks. Firstly, these methods are mostly
	context-insensitive, cannot accurately measure the similarity between two
	predicates in a specific context. Secondly, traditional methods usually model
	predicates independently, ignore the rich inter-dependencies between
	predicates. To address the above two issues, this pa-per proposes a graph-based
	method, which can discover inference rules by effectively modelling and
	exploiting both the context and the inter-dependencies between predicates.
	Specifically, we propose a graph-based representation—Predicate Graph, which
	can capture the semantic relevance between predicates using both the
	predicate-feature co-occurrence statistics and the inter-dependencies between
	predicates. Based on the predicate graph, we propose a context-sensitive random
	walk algorithm, which can learn con-text-specific predicate representations by
	distinguishing context-relevant information from context-irrelevant
	information. Experimental results show that our method significantly
	outperforms traditional inference rule discovery methods.},
  url       = {http://aclweb.org/anthology/C16-1273}
}

@InProceedings{zhou-liu-pan:2016:COLING,
  author    = {Zhou, Yao  and  Liu, Cong  and  Pan, Yan},
  title     = {Modelling Sentence Pairs with Tree-structured Attentive Encoder},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2912--2922},
  abstract  = {We describe an attentive encoder that combines tree-structured recursive neural
	networks and sequential recurrent neural networks for modelling sentence pairs.
	Since existing attentive models exert attention on the sequential structure, we
	propose a way to incorporate attention into the tree topology. Specially, given
	a pair of sentences, our attentive encoder uses the representation of one
	sentence, which generated via an RNN, to guide the structural encoding of the
	other sentence on the dependency parse tree. We evaluate the proposed attentive
	encoder on three tasks: semantic similarity, paraphrase identification and
	true-false question selection. Experimental results show that our encoder
	outperforms all baselines and achieves state-of-the-art results on two tasks.},
  url       = {http://aclweb.org/anthology/C16-1274}
}

@InProceedings{prakash-EtAl:2016:COLING,
  author    = {prakash, aaditya  and  Hasan, Sadid A.  and  Lee, Kathy  and  Datla, Vivek  and  Qadir, Ashequl  and  Liu, Joey  and  Farri, Oladimeji},
  title     = {Neural Paraphrase Generation with Stacked Residual LSTM Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2923--2934},
  abstract  = {In this paper, we propose a novel neural approach for paraphrase generation.
	Conventional paraphrase generation methods either leverage hand-written rules
	and thesauri-based alignments, or use statistical machine learning principles.
	To the best of our knowledge, this work is the first to explore deep learning
	models for paraphrase generation. Our primary contribution is a stacked
	residual LSTM network, where we add residual connections between LSTM layers.
	This allows for efficient training of deep LSTMs. We evaluate our model and
	other state-of-the-art deep learning models on three different datasets: PPDB,
	WikiAnswers, and MSCOCO. Evaluation results demonstrate that our model
	outperforms sequence to sequence, attention-based, and bi-directional LSTM
	models on BLEU, METEOR, TER, and an embedding-based sentence similarity metric.},
  url       = {http://aclweb.org/anthology/C16-1275}
}

@InProceedings{feng-EtAl:2016:COLING2,
  author    = {Feng, Xiaocheng  and  Tang, Duyu  and  Qin, Bing  and  Liu, Ting},
  title     = {English-Chinese Knowledge Base Translation with Neural Network},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2935--2944},
  abstract  = {Knowledge base (KB) such as Freebase plays an important role for many natural
	language processing tasks.
	English knowledge base is obviously larger and of higher quality than low
	resource language like Chinese.
	To expand Chinese KB by leveraging English KB resources, an effective way is to
	translate English KB (source) into Chinese (target).
	In this direction, two major challenges are to model triple semantics and to
	build a robust KB translator.
	We address these challenges by presenting a neural network approach, which
	learns continuous triple representation with a gated neural network. 
	Accordingly, source triples and target triples are mapped in the same semantic
	vector space.
	We build a new dataset for English-Chinese KB translation from Freebase, and
	compare with several baselines on it.
	Experimental results show that the proposed method improves translation
	accuracy compared with baseline methods. 
	We show that adaptive composition model improves standard solution such as
	neural tensor network in terms of translation accuracy.},
  url       = {http://aclweb.org/anthology/C16-1276}
}

@InProceedings{bougouin-boudin-daille:2016:COLING,
  author    = {Bougouin, Adrien  and  Boudin, Florian  and  Daille, Beatrice},
  title     = {Keyphrase Annotation with Graph Co-Ranking},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2945--2955},
  abstract  = {Keyphrase annotation is the task of identifying textual units that represent
	the main content of a document. Keyphrase annotation is either carried out by
	extracting the most important phrases from a document, keyphrase extraction, or
	by assigning entries from a controlled domain-specific vocabulary, keyphrase
	assignment. Assignment methods are generally more reliable. They provide
	better-formed keyphrases, as well as keyphrases that do not occur in the
	document. But they are often silent on the contrary of extraction methods that
	do not depend on manually built resources. This paper proposes a new method to
	perform both keyphrase extraction and keyphrase assignment in an integrated and
	mutual reinforcing manner. Experiments have been carried out on datasets
	covering different domains of humanities and social sciences. They show
	statistically significant improvements compared to both keyphrase extraction
	and keyphrase assignment state-of-the art methods.},
  url       = {http://aclweb.org/anthology/C16-1277}
}

@InProceedings{jansen-EtAl:2016:COLING,
  author    = {Jansen, Peter  and  Balasubramanian, Niranjan  and  Surdeanu, Mihai  and  Clark, Peter},
  title     = {What's in an Explanation? Characterizing Knowledge and Inference Requirements for Elementary Science Exams},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2956--2965},
  abstract  = {QA systems have been making steady advances in the challenging elementary
	science exam domain. In this work, we develop an explanation-based analysis of
	knowledge and inference requirements, which supports a fine-grained
	characterization of the challenges. In particular, we model the requirements
	based on appropriate sources of evidence to be used for the QA task. We create
	requirements by first identifying suitable sentences in a knowledge base that
	support the correct answer, then use these to build explanations, filling in
	any necessary missing information. These explanations are used to create a
	fine-grained categorization of the requirements. Using these requirements, we
	compare a retrieval and an inference solver on 212 questions. The analysis
	validates the gains of the inference solver, demonstrating that it answers more
	questions requiring complex inference, while also providing insights into the
	relative strengths of the solvers and knowledge sources. We release the
	annotated questions and explanations as a resource with broad utility for
	science exam QA, including determining knowledge base construction targets, as
	well as supporting information aggregation in automated inference.},
  url       = {http://aclweb.org/anthology/C16-1278}
}

@InProceedings{johnson-goldwasser:2016:COLING,
  author    = {Johnson, Kristen  and  Goldwasser, Dan},
  title     = {“All I know about politics is what I read in Twitter”: Weakly Supervised Models for Extracting Politicians’ Stances From Twitter},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2966--2977},
  abstract  = {During the 2016 United States presidential election, politicians have
	increasingly used Twitter to express their beliefs, stances on current
	political issues, and reactions concerning national and international events.
	Given the limited length of tweets and the scrutiny politicians face for what
	they choose or neglect to say, they must craft and time their tweets carefully.
	The content and delivery of these tweets is therefore highly indicative of a
	politician's stances. We present a weakly supervised method for extracting how
	issues are framed and temporal activity patterns on Twitter for popular
	politicians and issues of the 2016 election. These behavioral components are
	combined into a global model which collectively infers the most likely stance
	and agreement patterns among politicians, with respective accuracies of 86.44\%
	and 84.6\% on average.},
  url       = {http://aclweb.org/anthology/C16-1279}
}

@InProceedings{yang-mukherjee-zhang:2016:COLING,
  author    = {Yang, Fan  and  Mukherjee, Arjun  and  Zhang, Yifan},
  title     = {Leveraging Multiple Domains for Sentiment Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2978--2988},
  abstract  = {Sentiment classification becomes more and more important with the rapid growth
	of user generated content. However, sentiment classification task usually comes
	with two challenges: first, sentiment classification is highly
	domain-dependent and training sentiment classifier for every domain is
	inefficient and often impractical; second, since the quantity of labeled data
	is
	important for assessing the quality of classifier, it is hard to evaluate
	classifiers when labeled data is limited for certain domains. To address the
	challenges mentioned above, we focus on learning high-level features that are
	able to generalize across domains, so a global classifier can benefit with
	a simple combination of documents from multiple domains. In this paper, the
	proposed model incorporates both sentiment polarity and unlabeled data from
	multiple domains and learns new feature representations. Our model doesn't
	require labels from every domain, which means the learned feature
	representation can be generalized for sentiment domain adaptation. In addition,
	the learned feature representation can be used as classifier since our model
	defines the meaning of feature value and arranges high-level features in a
	prefixed order, so it is not necessary to train another classifier on top of
	the new features. Empirical evaluations demonstrate our model outperforms
	baselines and yields competitive results to other state-of-the-art works on
	benchmark datasets.},
  url       = {http://aclweb.org/anthology/C16-1280}
}

@InProceedings{bakken-EtAl:2016:COLING,
  author    = {Bakken, Patrik F.  and  Bratlie, Terje A.  and  Marco, Cristina  and  Gulla, Jon Atle},
  title     = {Political News Sentiment Analysis for Under-resourced Languages},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2989--2996},
  abstract  = {This paper presents classification results for the analysis of sentiment in
	political news articles. The domain of political news is particularly
	challenging, as journalists are presumably objective, whilst at the same time
	opinions can be subtly expressed. To deal with this challenge, in this work we
	conduct a two-step classification model, distinguishing first subjective and
	second positive and negative sentiment texts. More specifically, we propose a
	shallow machine learning approach where only minimal features are needed to
	train the classifier, including sentiment-bearing Co-Occurring Terms (COTs)
	and negation words. This approach yields close to state-of-the-art results.
	Contrary to results in other domains, the use of negations as features does not
	have a positive impact in the evaluation results. This method is particularly
	suited for languages that suffer from a lack of resources, such as sentiment
	lexicons or parsers, and for those systems that need to function in real-time.},
  url       = {http://aclweb.org/anthology/C16-1281}
}

@InProceedings{lund-EtAl:2016:COLING,
  author    = {Lund, Jeffrey  and  Felt, Paul  and  Seppi, Kevin  and  Ringger, Eric},
  title     = {Fast Inference for Interactive Models of Text},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2997--3006},
  abstract  = {Probabilistic models are a useful means for analyzing large text corpora.
	Integrating such models with human interaction enables many new use cases.
	However, adding human interaction to probabilistic models requires inference
	algorithms which are both fast and accurate. We explore the use of Iterated
	Conditional Modes as a fast alternative to Gibbs sampling or variational EM. We
	demonstrate superior performance both in run time and model quality on three
	different models of text including a DP Mixture of Multinomials for web search
	result clustering, the Interactive Topic Model, and M OM R ESP , a multinomial
	crowdsourcing model.},
  url       = {http://aclweb.org/anthology/C16-1282}
}

@InProceedings{tsakalidis-EtAl:2016:COLING,
  author    = {Tsakalidis, Adam  and  Liakata, Maria  and  Damoulas, Theo  and  Jellinek, Brigitte  and  Guo, Weisi  and  Cristea, Alexandra},
  title     = {Combining Heterogeneous User Generated Data to Sense Well-being},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3007--3018},
  abstract  = {In this paper we address a new problem of predicting affect and well-being
	scales in a real-world setting of heterogeneous, longitudinal and
	non-synchronous textual as well as non-linguistic data that can be harvested
	from on-line media and mobile phones. We describe the method for collecting the
	heterogeneous longitudinal data, how features are extracted to address missing
	information and differences in temporal alignment, and how the latter are
	combined to yield promising predictions of affect and well-being on the basis
	of widely used psychological scales. We achieve a coefficient of determination
	(R\^{}2) of 0.71-0.76 and a correlation coefficient of 0.68-0.87 which is higher
	than the state-of-the art in equivalent multi-modal tasks for affect.},
  url       = {http://aclweb.org/anthology/C16-1283}
}

@InProceedings{li-EtAl:2016:COLING10,
  author    = {Li, Yang  and  Liu, Ting  and  Jiang, Jing  and  Zhang, Liang},
  title     = {Hashtag Recommendation with Topical Attention-Based LSTM},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3019--3029},
  abstract  = {Microblogging services allow users to create hashtags to categorize their
	posts.
	In recent years, the task of recommending hashtags for microblogs has been
	given increasing attention. 
	However, most of existing methods depend on hand-crafted features.
	Motivated by the successful use of long short-term memory (LSTM) for many
	natural language processing tasks, in this paper, we adopt LSTM to learn the
	representation of a microblog post.
	Observing that hashtags indicate the primary topics of microblog posts, we
	propose a novel attention-based LSTM model which incorporates topic modeling
	into the LSTM architecture through an attention mechanism. 
	We evaluate our model using a large real-world dataset. Experimental results
	show that our model significantly outperforms various competitive baseline
	methods. Furthermore, the incorporation of topical attention mechanism gives
	more than 7.4% improvement in F1 score compared with standard LSTM method.},
  url       = {http://aclweb.org/anthology/C16-1284}
}

@InProceedings{kordjamshidi-EtAl:2016:COLING,
  author    = {Kordjamshidi, Parisa  and  Khashabi, Daniel  and  Christodoulopoulos, Christos  and  Mangipudi, Bhargav  and  Singh, Sameer  and  Roth, Dan},
  title     = {Better call Saul: Flexible Programming for Learning and Inference in NLP},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3030--3040},
  abstract  = {We present a novel way for designing complex joint inference and learning
	models using Saul~\cite{KordjamshidiRoWu15}, a recently-introduced declarative
	learning-based programming language (DeLBP). We enrich Saul with components
	that are necessary for a broad range of learning based Natural Language
	Processing tasks at various levels of granularity. We illustrate these advances
	using three different, well-known NLP problems, and show how these generic
	learning and inference modules can directly exploit Saul's graph-based data
	representation. These properties allow the programmer to easily switch between
	different model formulations and configurations, and consider various kinds of
	dependencies and correlations among variables of interest with minimal
	programming effort. We argue that Saul provides an extremely useful paradigm
	both for the design of advanced NLP systems and for supporting advanced
	research in NLP.},
  url       = {http://aclweb.org/anthology/C16-1285}
}

@InProceedings{guillaume-fort-lefebvre:2016:COLING,
  author    = {Guillaume, Bruno  and  Fort, Kar\"{e}n  and  Lefebvre, Nicolas},
  title     = {Crowdsourcing Complex Language Resources: Playing to Annotate Dependency Syntax},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3041--3052},
  abstract  = {This article presents the results we obtained on a complex annotation task
	(that of dependency syntax) using a specifically designed Game with a Purpose,
	ZombiLingo.
	We show that with suitable mechanisms (decomposition of the task, training of
	the players and regular control of the annotation quality during the game), it
	is possible to obtain annotations whose quality is significantly higher than
	that obtainable with a parser, provided that enough players participate. The
	source code of the game and the resulting annotated corpora (for French) are
	freely available.},
  url       = {http://aclweb.org/anthology/C16-1286}
}

@InProceedings{singhal-bhattacharyya:2016:COLING,
  author    = {Singhal, Prerana  and  Bhattacharyya, Pushpak},
  title     = {Borrow a Little from your Rich Cousin: Using Embeddings and Polarities of English Words for Multilingual Sentiment Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3053--3062},
  abstract  = {In this paper, we provide a solution to multilingual sentiment classification
	using deep learning. Given input text in a language, we  use word translation
	into English and then the embeddings of these English words to train a
	classifier. This projection into the English space plus word embeddings gives a
	simple and uniform framework for multilingual sentiment analysis. A novel idea
	is augmentation of the training data with polar words, appearing in these
	sentences, along with their polarities. This approach leads to a performance
	gain of 7-10% over traditional classifiers on many languages, irrespective of
	text genre, despite the scarcity of resources in most languages.},
  url       = {http://aclweb.org/anthology/C16-1287}
}

@InProceedings{yang-EtAl:2016:COLING,
  author    = {Yang, Zhen  and  Chen, Wei  and  Wang, Feng  and  Xu, Bo},
  title     = {A Character-Aware Encoder for Neural Machine Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3063--3070},
  abstract  = {This article proposes a novel character-aware neural machine translation (NMT)
	model that views
	the input sequences as sequences of characters rather than words. On the use of
	row convolution
	(Amodei et al., 2015), the encoder of the proposed model composes word-level
	information from
	the input sequences of characters automatically. Since our model doesn’t rely
	on the boundaries
	between each word (as the whitespace boundaries in English), it is also applied
	to languages
	without explicit word segmentations (like Chinese). Experimental results on
	Chinese-English
	translation tasks show that the proposed character-aware NMT model can achieve
	comparable
	translation performance with the traditional word based NMT models. Despite the
	target side is
	still word based, the proposed model is able to generate much less unknown
	words.},
  url       = {http://aclweb.org/anthology/C16-1288}
}

@InProceedings{su-EtAl:2016:COLING,
  author    = {su, jinsong  and  Zhang, Biao  and  Xiong, Deyi  and  Li, Ruochen  and  Yin, Jianmin},
  title     = {Convolution-Enhanced Bilingual Recursive Neural Network for Bilingual Semantic Modeling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3071--3081},
  abstract  = {Estimating similarities at different levels of linguistic units, such as words,
	sub-phrases and
	phrases, is helpful for measuring semantic similarity of an entire bilingual
	phrase. In this paper,
	we propose a convolution-enhanced bilingual recursive neural network
	(ConvBRNN), which not
	only exploits word alignments to guide the generation of phrase structures but
	also integrates
	multiple-level information of the generated phrase structures into bilingual
	semantic modeling.
	In order to accurately learn the semantic hierarchy of a bilingual phrase, we
	develop a recursive
	neural network to constrain the learned bilingual phrase structures to be
	consistent with word
	alignments. Upon the generated source and target phrase structures, we stack a
	convolutional
	neural network to integrate vector representations of linguistic units on the
	structures into bilingual
	phrase embeddings. After that, we fully incorporate information of different
	linguistic units
	into a bilinear semantic similarity model. We introduce two max-margin losses
	to train the ConvBRNN
	model: one for the phrase structure inference and the other for the semantic
	similarity
	model. Experiments on NIST Chinese-English translation tasks demonstrate the
	high quality of
	the generated bilingual phrase structures with respect to word alignments and
	the effectiveness
	of learned semantic similarities on machine translation.},
  url       = {http://aclweb.org/anthology/C16-1289}
}

@InProceedings{feng-EtAl:2016:COLING3,
  author    = {Feng, Shi  and  Liu, Shujie  and  Yang, Nan  and  Li, Mu  and  Zhou, Ming  and  Zhu, Kenny Q.},
  title     = {Improving Attention Modeling with Implicit Distortion and Fertility for Machine Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3082--3092},
  abstract  = {In neural machine translation, the attention mechanism facilitates the
	translation process by producing a soft alignment between the source sentence
	and the target sentence.  However, without dedicated distortion and fertility
	models seen in traditional SMT systems, the learned alignment may not be
	accurate, which can lead to low translation quality. In this paper, we propose
	two novel models to improve attention-based neural machine translation.  We
	propose a recurrent attention mechanism as an implicit distortion model, and a
	fertility conditioned decoder as an implicit fertility model. We conduct
	experiments on large-scale Chinese--English translation tasks. The results
	show
	that our models significantly improve both the alignment and translation
	quality
	compared to the original attention mechanism and several other variations.},
  url       = {http://aclweb.org/anthology/C16-1290}
}

@InProceedings{liu-EtAl:2016:COLING,
  author    = {Liu, Lemao  and  Utiyama, Masao  and  Finch, Andrew  and  Sumita, Eiichiro},
  title     = {Neural Machine Translation with Supervised Attention},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3093--3102},
  abstract  = {The attention mechanism is appealing for neural machine translation,
	since it is able to dynamically encode a source sentence by generating a
	alignment between a target word and source words. Unfortunately, it has been
	proved to be worse than conventional alignment models in alignment accuracy. In
	this paper, we analyze and explain this issue from the point view of
	reordering, and propose a supervised attention which is learned with guidance
	from conventional alignment models. Experiments on two Chinese-to-English
	translation tasks show that the supervised attention mechanism yields better
	alignments leading to substantial gains over the standard attention based NMT.},
  url       = {http://aclweb.org/anthology/C16-1291}
}

@InProceedings{sperber-EtAl:2016:COLING,
  author    = {Sperber, Matthias  and  Neubig, Graham  and  Niehues, Jan  and  St\"{u}ker, Sebastian  and  Waibel, Alex},
  title     = {Lightly Supervised Quality Estimation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3103--3113},
  abstract  = {Evaluating the quality of output from language processing systems such as
	machine translation or speech recognition is an essential step in ensuring that
	they are sufficient for practical use. However, depending on the practical
	requirements, evaluation approaches can differ strongly. Often, reference-based
	evaluation measures (such as BLEU or WER) are appealing because they are cheap
	and allow rapid quantitative comparison. On the other hand, practitioners often
	focus on manual evaluation because they must deal with frequently changing
	domains and quality standards requested by customers, for which reference-based
	evaluation is insufficient or not possible due to missing in-domain reference
	data (Harris et al., 2016). In this paper, we attempt to bridge this gap by
	proposing a framework for lightly supervised quality estimation. We collect
	manually annotated scores for a small number of segments in a test corpus or
	document, and combine them with automatically predicted quality scores for the
	remaining segments to predict an overall quality estimate. An evaluation shows
	that our framework estimates  quality more reliably than using fully automatic
	quality estimation approaches, while keeping annotation effort low by not
	requiring full references to be available for the particular domain.},
  url       = {http://aclweb.org/anthology/C16-1292}
}

@InProceedings{tang-EtAl:2016:COLING2,
  author    = {Tang, Haiqing  and  Xiong, Deyi  and  Lopez de Lacalle, Oier  and  Agirre, Eneko},
  title     = {Improving Translation Selection with Supersenses},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3114--3123},
  abstract  = {Selecting appropriate translations for source words with multiple meanings
	still remains a challenge for statistical machine translation (SMT). One reason
	for this is that most SMT systems are not good at detecting the proper sense
	for a polysemic word when it appears in different contexts. In this paper, we
	adopt a supersense tagging method to annotate source words with coarse-grained
	ontological concepts. In order to enable the system to choose an appropriate
	translation for a word or phrase according to the annotated supersense of the
	word or phrase, we propose two translation models with supersense knowledge: a
	maximum entropy based model and a supersense embedding model. The effectiveness
	of our proposed models is validated on a large-scale English-to-Spanish
	translation task. Results indicate that our method can significantly improve
	translation quality via correctly conveying the meaning of the source language
	to the target language.
	Author{3}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1293}
}

@InProceedings{graham-EtAl:2016:COLING,
  author    = {Graham, Yvette  and  Baldwin, Timothy  and  Dowling, Meghan  and  Eskevich, Maria  and  Lynn, Teresa  and  Tounsi, Lamia},
  title     = {Is all that Glitters in Machine Translation Quality Estimation really Gold?},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3124--3134},
  abstract  = {Human-targeted metrics provide a compromise between human evaluation of machine
	translation, where high inter-annotator agreement is difficult to achieve, and
	fully automatic metrics, such as BLEU or TER, that lack the validity of human
	assessment. Human-targeted translation edit rate (HTER) is by far the most
	widely employed human-targeted metric in machine translation, commonly
	employed, for example, as a gold standard in evaluation of quality estimation.
	Original experiments justifying the design of HTER, as opposed to other
	possible formulations, were limited to a small sample of translations and a
	single language pair, however, and this motivates our re-evaluation of a range
	of human-targeted metrics on a substantially larger scale. Results show
	significantly stronger correlation with human judgment for HBLEU over HTER for
	two of the nine language pairs we include and no significant difference between
	correlations
	achieved by HTER and HBLEU for the remaining language pairs. Finally, we
	evaluate a range of quality estimation systems employing HTER and direct
	assessment (DA) of translation adequacy as gold labels, resulting in a
	divergence in system rankings, and propose employment of DA for future quality
	estimation evaluations.},
  url       = {http://aclweb.org/anthology/C16-1294}
}

@InProceedings{wang-EtAl:2016:COLING5,
  author    = {Wang, Rui  and  Zhao, Hai  and  Lu, Bao-Liang  and  Utiyama, Masao  and  Sumita, Eiichiro},
  title     = {Connecting Phrase based Statistical Machine Translation Adaptation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3135--3145},
  abstract  = {Although more additional corpora are now available for Statistical Machine
	Translation (SMT), only the ones which belong to the same or similar domains of
	the original corpus can indeed enhance SMT performance directly. A series of
	SMT adaptation methods have been proposed to select these similar-domain data,
	and most of them focus on sentence selection.  In comparison, phrase is a
	smaller and more fine grained unit for data selection, therefore we propose a
	straightforward and efficient connecting phrase based adaptation method, which
	is applied to both bilingual phrase pair and monolingual n-gram adaptation. The
	proposed method is evaluated on IWSLT/NIST data sets, and the results show that
	phrase based SMT performances are significantly improved (up to +1.6 in
	comparison with phrase based SMT baseline system and +0.9 in comparison with
	existing methods).},
  url       = {http://aclweb.org/anthology/C16-1295}
}

@InProceedings{schulz-aziz:2016:COLING,
  author    = {Schulz, Philip  and  Aziz, Wilker},
  title     = {Fast Collocation-Based Bayesian HMM Word Alignment},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3146--3155},
  abstract  = {We present a new Bayesian HMM word alignment model for statistical machine
	translation. 
	The model is a mixture of an alignment model and a language model. 
	The alignment component is a Bayesian extension of the standard HMM. 
	The language model component is responsible for the generation of words needed
	for source fluency reasons from source language context. 
	This allows for untranslatable source words to remain unaligned and at the same
	time avoids the introduction of artificial NULL words which introduces
	unusually long alignment jumps. 
	Existing Bayesian word alignment models are unpractically slow because they
	consider each target position when resampling a given alignment link. 
	The sampling complexity therefore grows linearly in the target sentence length.
	In order to make our model useful in practice, we devise an auxiliary variable
	Gibbs sampler that allows us to resample alignment links in constant time
	independently of the target sentence length. 
	This leads to considerable speed improvements. 
	Experimental results show that our model performs as well as existing word
	alignment toolkits in terms of resulting BLEU score.},
  url       = {http://aclweb.org/anthology/C16-1296}
}

@InProceedings{jehl-riezler:2016:COLING,
  author    = {Jehl, Laura  and  Riezler, Stefan},
  title     = {Learning to translate from graded and negative relevance information},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3156--3166},
  abstract  = {We present an approach for learning to translate by exploiting cross-lingual
	link structure in multilingual document collections. We propose a new learning
	objective based on structured ramp loss, which learns from graded relevance,
	explicitly including negative relevance information. Our results on English
	German translation of Wikipedia entries show small, but significant,
	improvements of our method over an unadapted baseline, even when only a weak
	relevance signal is used. We also compare our method to monolingual language
	model adaptation and automatic pseudo-parallel data extraction and find small
	improvements even over these strong baselines.},
  url       = {http://aclweb.org/anthology/C16-1297}
}

@InProceedings{daiber-stanojevic-simaan:2016:COLING,
  author    = {Daiber, Joachim  and  Stanojevi\'{c}, Milo\v{s}  and  Sima'an, Khalil},
  title     = {Universal Reordering via Linguistic Typology},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3167--3176},
  abstract  = {In this paper we explore the novel idea of building a single universal
	reordering model from English to a large number of target languages. To build
	this model we exploit typological features of word order for a large number of
	target languages together with source (English) syntactic features and we train
	this model on a single combined parallel corpus representing all (22) involved
	language pairs. We contribute experimental evidence for the usefulness of
	linguistically defined typological features for building such a model. When the
	universal reordering model is used for preordering followed by monotone
	translation (no reordering inside the decoder), our experiments show that this
	pipeline gives comparable or improved translation performance with a
	phrase-based baseline for a large number of language pairs (12 out of 22) from
	diverse language families.},
  url       = {http://aclweb.org/anthology/C16-1298}
}

@InProceedings{durrani-EtAl:2016:COLING,
  author    = {Durrani, Nadir  and  Sajjad, Hassan  and  Joty, Shafiq  and  Abdelali, Ahmed},
  title     = {A Deep Fusion Model for Domain Adaptation in Phrase-based MT},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3177--3187},
  abstract  = {We present a novel fusion model for domain adaptation in Statistical Machine
	Translation. Our model is based on the joint source-target 
	neural network Devlin et al., 2014, and is learned by fusing in- and out-domain
	models. The adaptation is performed by backpropagating errors from the output
	layer to the word embedding layer of each model, subsequently adjusting
	parameters of the composite model towards the in-domain data. On the standard
	tasks of translating English-to-German and Arabic-to-English TED talks, we
	observed average improvements of +0.9 and +0.7 BLEU points, respectively over a
	competition grade phrase-based system. We also demonstrate improvements over
	existing adaptation methods.},
  url       = {http://aclweb.org/anthology/C16-1299}
}

@InProceedings{zhang-EtAl:2016:COLING6,
  author    = {Zhang, Meng  and  Liu, Yang  and  Luan, Huanbo  and  Liu, Yiqun  and  Sun, Maosong},
  title     = {Inducing Bilingual Lexica From Non-Parallel Data With Earth Mover's Distance Regularization},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3188--3198},
  abstract  = {Being able to induce word translations from non-parallel data is often a
	prerequisite for cross-lingual processing in resource-scarce languages and
	domains. Previous endeavors typically simplify this task by imposing the
	one-to-one translation assumption, which is too strong to hold for natural
	languages. We remove this constraint by introducing the Earth Mover's Distance
	into the training of bilingual word embeddings. In this way, we take advantage
	of its capability to handle multiple alternative word translations in a natural
	form of regularization. Our approach shows significant and consistent
	improvements across four language pairs. We also demonstrate that our approach
	is particularly preferable in resource-scarce settings as it only requires a
	minimal seed lexicon.
	Author{5}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1300}
}

@InProceedings{hirschmann-nam-furnkranz:2016:COLING,
  author    = {Hirschmann, Fabian  and  Nam, Jinseok  and  F\"{u}rnkranz, Johannes},
  title     = {What Makes Word-level Neural Machine Translation Hard: A Case Study on English-German Translation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3199--3208},
  abstract  = {Traditional machine translation systems often require heavy feature engineering
	and the combination of multiple techniques for solving different subproblems.
	In recent years, several end-to-end learning architectures based on recurrent
	neural networks have been proposed. Unlike traditional systems, Neural Machine
	Translation (NMT) systems learn the parameters of the model and require only
	minimal preprocessing. Memory and time constraints allow to take only a fixed
	number of words into account, which leads to the out-of-vocabulary (OOV)
	problem. In this work, we analyze why the OOV problem arises and why it is
	considered a serious problem in German. We study the effectiveness of compound
	word splitters for alleviating the OOV problem, resulting in a 2.5+ BLEU points
	improvement over a baseline on the WMT'14 German-to-English translation task.
	For English-to-German translation, we use target-side compound splitting
	through a special syntax during training that allows the model to merge
	compound words and gain 0.2 BLEU points.},
  url       = {http://aclweb.org/anthology/C16-1301}
}

@InProceedings{jalilisabet-faili-haffari:2016:COLING,
  author    = {Jalili Sabet, Masoud  and  Faili, Heshaam  and  Haffari, Gholamreza},
  title     = {Improving Word Alignment of Rare Words with Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3209--3215},
  abstract  = {We address the problem of inducing word alignment for language pairs by
	developing an unsupervised model with the capability of getting applied to
	other generative alignment models.  We approach the task by: i)proposing a new
	alignment model based on the IBM alignment model 1 that uses vector
	representation of words, and ii)examining the use of similar source words
	to overcome the problem of rare source words and improving the alignments. We
	apply our method to English-French corpora and run the experiments with
	different sizes of sentence pairs. Our results show competitive performance
	against the baseline and in some cases improve the results up to 6.9% in terms
	of precision.},
  url       = {http://aclweb.org/anthology/C16-1302}
}

@InProceedings{chang-EtAl:2016:COLING,
  author    = {Chang, Ching-Yun  and  Zhang, Yue  and  Teng, Zhiyang  and  Bozanic, Zahn  and  Ke, Bin},
  title     = {Measuring the Information Content of Financial News},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3216--3225},
  abstract  = {Measuring the information content of news text is useful for decision makers in
	their investments since news information can influence the intrinsic values of
	companies. We propose a model to automatically measure the information content
	given news text, trained using news and corresponding cumulative abnormal
	returns of listed companies. Existing methods in finance literature exploit
	sentiment signal features, which are limited by not considering factors such as
	events. We address this issue by leveraging deep neural models to extract rich
	semantic features from news text. In particular, a novel tree-structured LSTM
	is used to find target-specific representations of news text given syntax
	structures. Empirical results show that the neural models can outperform
	sentiment-based models, demonstrating the effectiveness of recent NLP
	technology advances for computational finance.},
  url       = {http://aclweb.org/anthology/C16-1303}
}

@InProceedings{godea-bulgarov-nielsen:2016:COLING,
  author    = {Godea, Andreea  and  Bulgarov, Florin  and  Nielsen, Rodney},
  title     = {Automatic Generation and Classification of Minimal Meaningful Propositions in Educational Systems},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3226--3236},
  abstract  = {Truly effective and practical educational systems will only be achievable when
	they have the ability to fully recognize deep relationships between a
	learner’s interpretation of a subject and the desired conceptual
	understanding. In this paper, we take important steps in this direction by
	introducing a new representation of sentences -- Minimal Meaningful
	Propositions (MMPs), which will allow us to significantly improve the mapping
	between a learner’s answer and the ideal response. Using this technique, we
	make significant progress towards highly scalable and domain independent
	educational systems, that will be able to operate without human intervention.
	Even though this is a new task, we show very good results both for the
	extraction of MMPs and for classification with respect to their importance.},
  url       = {http://aclweb.org/anthology/C16-1304}
}

@InProceedings{panagiotou-EtAl:2016:COLING,
  author    = {Panagiotou, Nikolaos  and  Akkaya, Cem  and  Tsioutsiouliklis, Kostas  and  Kalogeraki, Vana  and  Gunopulos, Dimitrios},
  title     = {First Story Detection using Entities and Relations},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3237--3244},
  abstract  = {News portals, such as Yahoo News or Google News, collect large amounts of
	documents from a variety of sources on a daily basis. Only a small portion of
	these documents can be selected and displayed on the homepage. Thus, there is a
	strong preference for major, recent events. In this work, we propose a scalable
	and accurate First Story Detection (FSD) pipeline that identifies fresh news.
	In comparison to other FSD systems, our method relies on relation extraction
	methods exploiting entities and their relations. We evaluate our pipeline using
	two distinct datasets from Yahoo News and Google News. Experimental results 
	demonstrate that our method improves over the state-of-the-art systems on both
	datasets with constant space and time requirements.},
  url       = {http://aclweb.org/anthology/C16-1305}
}

@InProceedings{loukina-EtAl:2016:COLING,
  author    = {Loukina, Anastassia  and  Yoon, Su-Youn  and  Sakano, Jennifer  and  Wei, Youhua  and  Sheehan, Kathy},
  title     = {Textual complexity as a predictor of difficulty of listening items in language proficiency tests},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3245--3253},
  abstract  = {In this paper we explore to what extent the difficulty of listening items in an
	English language proficiency test can be predicted by the textual properties of
	the prompt. We show that a system based on multiple text complexity features
	can predict item difficulty for several different item types and for some items
	achieves higher accuracy than human estimates of item difficulty.},
  url       = {http://aclweb.org/anthology/C16-1306}
}

@InProceedings{hu-chen-chen:2016:COLING,
  author    = {HU, Renfen  and  Chen, Jiayong  and  Chen, Kuang-hua},
  title     = {The Construction of a Chinese Collocational Knowledge Resource and Its Application for Second Language Acquisition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3254--3263},
  abstract  = {The appropriate use of collocations is a challenge for second language
	acquisition. However, high quality and easily accessible Chinese collocation
	resources are not available for both teachers and students. This paper presents
	the design and construction of a large scale resource of Chinese collocational
	knowledge, and a web-based application (OCCA, Online Chinese Collocation
	Assistant) which offers free and convenient collocation search service to end
	users. We define and classify collocations based on practical language
	acquisition needs and utilize a syntax based method to extract nine types of
	collocations. Totally 37 extraction rules are compiled with word, POS and
	dependency relation features, 1,750,000 collocations are extracted from a
	corpus for L2 learning and complementary Wikipedia data, and OCCA is
	implemented based on these extracted collocations. By comparing OCCA with two
	traditional collocation dictionaries, we find OCCA has higher entry coverage
	and collocation quantity, and our method achieves quite low error rate at less
	than 5%. We also discuss how to apply collocational knowledge to grammatical
	error detection and demonstrate comparable performance to the best results in
	2015 NLP-TEA CGED shared task. The preliminary experiment shows that the
	collocation knowledge is helpful in detecting all the four types of grammatical
	errors.},
  url       = {http://aclweb.org/anthology/C16-1307}
}

@InProceedings{lu-EtAl:2016:COLING,
  author    = {Lu, Jing  and  Venugopal, Deepak  and  Gogate, Vibhav  and  Ng, Vincent},
  title     = {Joint Inference for Event Coreference Resolution},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3264--3275},
  abstract  = {Event coreference resolution is a challenging problem since it relies on
	several components of the information extraction pipeline that typically yield
	noisy outputs. We hypothesize that exploiting the inter-dependencies between
	these components can significantly improve the performance of an event
	coreference resolver, and subsequently propose a novel joint inference based
	event coreference resolver using Markov Logic Networks (MLNs). However, the
	rich features that are important for this task are typically very hard to
	explicitly encode as MLN formulas since they significantly increase the size of
	the MLN, thereby making joint inference and learning infeasible. To address
	this problem, we propose a novel solution where we implicitly encode rich
	features into our model by augmenting the MLN distribution with low dimensional
	unit clauses. Our approach achieves state-of-the-art results on two standard
	evaluation corpora.},
  url       = {http://aclweb.org/anthology/C16-1308}
}

@InProceedings{ge-EtAl:2016:COLING,
  author    = {Ge, Tao  and  Cui, Lei  and  Chang, Baobao  and  Sui, Zhifang  and  Zhou, Ming},
  title     = {Event Detection with Burst Information Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3276--3286},
  abstract  = {Retrospective event detection is an important task for discovering previously
	unidentified events in a text stream. In this paper, we propose two fast
	centroid-aware event detection models based on a novel text stream
	representation -- Burst Information Networks (BINets) for addressing the
	challenge. The BINets are time-aware, efficient and can be easily analyzed for
	identifying key information (centroids). These advantages allow the BINet-based
	approaches to achieve the state-of-the-art performance on multiple datasets,
	demonstrating the efficacy of BINets for the task of event detection.
	Author{4}{Affiliation}},
  url       = {http://aclweb.org/anthology/C16-1309}
}

@InProceedings{zhu-EtAl:2016:COLING,
  author    = {Zhu, Suyang  and  Li, Shoushan  and  Chen, Ying  and  Zhou, Guodong},
  title     = {Corpus Fusion for Emotion Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3287--3297},
  abstract  = {Machine learning-based methods have obtained great progress on emotion
	classification. However, in most previous studies, the models are learned based
	on a single corpus which often suffers from insufficient labeled data. In this
	paper, we propose a corpus fusion approach to address emotion classification
	across two corpora which use different emotion taxonomies. The objective of
	this approach is to utilize the annotated data from one corpus to help the
	emotion classification on another corpus. An Integer Linear Programming (ILP)
	optimization is proposed to refine the classification results. Empirical
	studies show the effectiveness of the proposed approach to corpus fusion for
	emotion classification.},
  url       = {http://aclweb.org/anthology/C16-1310}
}

@InProceedings{tang-EtAl:2016:COLING3,
  author    = {Tang, Duyu  and  Qin, Bing  and  Feng, Xiaocheng  and  Liu, Ting},
  title     = {Effective LSTMs for Target-Dependent Sentiment Classification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3298--3307},
  abstract  = {Target-dependent sentiment classification remains a challenge: modeling the
	semantic relatedness of a target with its context words in a sentence.
	Different context words have different influences on determining the sentiment
	polarity of a sentence towards the target. Therefore, it is desirable to
	integrate the connections between target word and context words when building a
	learning system. In this paper, we develop two target dependent long short-term
	memory (LSTM) models, where target information is automatically taken into
	account. We evaluate our methods on a benchmark dataset from Twitter. Empirical
	results show that modeling sentence representation with standard LSTM does not
	perform well. Incorporating target information into LSTM can significantly
	boost the classification accuracy. The target-dependent LSTM models achieve
	state-of-the-art performances without using syntactic parser or external
	sentiment lexicons.},
  url       = {http://aclweb.org/anthology/C16-1311}
}

@InProceedings{stede:2016:COLING,
  author    = {Stede, Manfred},
  title     = {Towards assessing depth of argumentation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3308--3317},
  abstract  = {For analyzing argumentative text, we propose to study the ‘depth’ of
	argumentation as one important component, which we distinguish from argument
	quality. In a pilot study with German newspaper commentary texts, we asked
	students to rate the degree of argumentativeness, and then looked for
	correlations with features of the annotated argumentation structure and the
	rhetorical structure (in terms of RST). The results indicate that the human
	judgements correlate with our operationalization of depth and with certain
	structural features of RST trees.},
  url       = {http://aclweb.org/anthology/C16-1312}
}

@InProceedings{phan-EtAl:2016:COLING,
  author    = {Phan, Sang  and  Miyao, Yusuke  and  Le, Duy-Dinh  and  Satoh, Shin'ichi},
  title     = {Video Event Detection by Exploiting Word Dependencies from Image Captions},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3318--3327},
  abstract  = {Video event detection is a challenging problem in information and multimedia
	retrieval. Different from single action detection, event detection requires a
	richer level of semantic information from video. In order to overcome this
	challenge, existing solutions often represent videos using high level features
	such as concepts. However, concept-based representation can be confusing
	because it does not encode the relationship between concepts. This issue can be
	addressed by exploiting the co-occurrences of the concepts, however, it often
	leads to a very huge number of possible combinations. In this paper, we propose
	a new approach to obtain the relationship between concepts by exploiting the
	syntactic dependencies between words in the image captions. The main advantage
	of this approach is that it significantly reduces the number of informative
	combinations between concepts. We conduct extensive experiments to analyze the
	effectiveness of using the new dependency representation for event detection on
	two large-scale TRECVID Multimedia Event Detection 2013 and 2014 datasets.
	Experimental results show that i) Dependency features are more discriminative
	than concept-based features. ii) Dependency features can be combined with our
	current event detection system to further improve the performance. For
	instance, the relative improvement can be as far as 8.6\% on the MEDTEST14 10Ex
	setting.},
  url       = {http://aclweb.org/anthology/C16-1313}
}

@InProceedings{xiao-EtAl:2016:COLING,
  author    = {Xiao, Yang  and  Wang, Yuan  and  Mao, Hangyu  and  Xiao, Zhen},
  title     = {Predicting Restaurant Consumption Level through Social Media Footprints},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3328--3338},
  abstract  = {Accurate prediction of user attributes from social media is valuable for both
	social science analysis and consumer targeting.  In this paper,  we propose a
	systematic method to leverage user online social media content for predicting
	offline restaurant consumption level. We utilize the social login as a bridge
	and construct a dataset of 8,844 users who have been linked across Dianping
	(similar to Yelp) and Sina Weibo. More specifically, we construct consumption
	level ground truth based on user self report spending. We build predictive
	models using both raw features and, especially, latent features, such as topic
	distributions and celebrities clusters. The employed methods demonstrate that
	online social media content has strong predictive power for offline spending.
	Finally, combined with qualitative feature analysis, we present the differences
	in words usage, topic interests and following behavior between different
	consumption level groups.},
  url       = {http://aclweb.org/anthology/C16-1314}
}

@InProceedings{mao-EtAl:2016:COLING,
  author    = {Mao, Xian-Ling  and  Hao, Yi-Jing  and  Zhou, Qiang  and  Yuan, Wen-Qing  and  Yang, Liner  and  Huang, Heyan},
  title     = {A Novel Fast Framework for Topic Labeling Based on Similarity-preserved Hashing},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3339--3348},
  abstract  = {Recently, topic modeling has been widely applied in data mining due to its
	powerful ability. A common, major challenge in applying such topic models to
	other tasks is to accurately interpret the meaning of each topic. Topic
	labeling, as a major interpreting method, has attracted significant attention
	recently. However, most of previous works only focus on the effectiveness of
	topic labeling, and less attention has been paid to quickly creating good topic
	descriptors; meanwhile, it’s hard to assign labels for new emerging topics by
	using most of existing methods. To solve the problems above, in this paper, we
	propose a novel fast topic labeling framework that casts the labeling problem
	as a k-nearest neighbor (KNN) search problem in a probability vector set. Our
	experimental results show that the proposed sequential interleaving method
	based on locality sensitive hashing (LSH) technology is efficient in boosting
	the comparison speed among probability distributions, and the proposed
	framework can generate meaningful labels to interpret topics, including new
	emerging topics.},
  url       = {http://aclweb.org/anthology/C16-1315}
}

@InProceedings{mou-EtAl:2016:COLING,
  author    = {Mou, Lili  and  Song, Yiping  and  Yan, Rui  and  Li, Ge  and  Zhang, Lu  and  Jin, Zhi},
  title     = {Sequence to Backward and Forward Sequences: A Content-Introducing Approach to Generative Short-Text Conversation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3349--3358},
  abstract  = {Using neural networks to generate replies in human-computer dialogue systems is
	attracting increasing attention over the past few years. However, the
	performance is not satisfactory: the neural network tends to generate safe,
	universally relevant replies which carry little meaning. In this paper, we
	propose a content-introducing approach to neural network-based generative
	dialogue systems. We first use pointwise mutual information (PMI) to predict a
	noun as a keyword, reflecting the main gist of the reply. We then propose
	seq2BF, a "sequence to backward and forward sequences" model, which generates a
	reply containing the given keyword. Experimental results show that our approach
	significantly outperforms traditional sequence-to-sequence models in terms of
	human evaluation and the entropy measure, and that the predicted keyword can
	appear at an appropriate position in the reply.},
  url       = {http://aclweb.org/anthology/C16-1316}
}

@InProceedings{gervits-eberhard-scheutz:2016:COLING,
  author    = {Gervits, Felix  and  Eberhard, Kathleen  and  Scheutz, Matthias},
  title     = {Disfluent but effective? A quantitative study of disfluencies and conversational moves in team discourse},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3359--3369},
  abstract  = {Situated dialogue systems that interact with humans as part of a team (e.g.,
	robot teammates) need to be able to use information from communication channels
	to gauge the coordination level and effectiveness of the team. Currently, the
	feasibility of this end goal is limited by several gaps in both the empirical
	and computational literature. The purpose of this paper is to address those
	gaps in the following ways: (1) investigate which properties of task-oriented
	discourse correspond with effective performance in human teams, and (2) discuss
	how and to what extent these properties can be utilized in spoken dialogue
	systems. To this end, we analyzed natural language data from a unique corpus of
	spontaneous, task-oriented dialogue (CReST corpus), which was annotated for
	disfluencies and conversational moves. We found that effective teams made more
	self-repair disfluencies and used specific communication strategies to
	facilitate
	  grounding and coordination. Our results indicate that truly robust and
	natural dialogue systems will need to interpret highly disfluent utterances and
	also utilize specific collaborative mechanisms to facilitate grounding. These
	data shed light on effective communication in performance scenarios and
	directly inform the development of robust dialogue systems for situated
	artificial agents.},
  url       = {http://aclweb.org/anthology/C16-1317}
}

@InProceedings{vougiouklis-hare-simperl:2016:COLING,
  author    = {Vougiouklis, Pavlos  and  Hare, Jonathon  and  Simperl, Elena},
  title     = {A Neural Network Approach for Knowledge-Driven Response Generation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3370--3380},
  abstract  = {We present a novel response generation system. The system assumes the
	hypothesis that participants in a conversation base their response not only on
	previous dialog utterances but also on their background knowledge. Our model is
	based on a Recurrent Neural Network (RNN) that is trained over concatenated
	sequences of comments, a Convolution Neural Network that is trained over
	Wikipedia sentences and a formulation that couples the two trained embeddings
	in a multimodal space. We create a dataset of aligned Wikipedia sentences and
	sequences of Reddit utterances, which we we use to train our model. Given a
	sequence of past utterances and a set of sentences that represent the
	background knowledge, our end-to-end learnable model is able to generate
	context-sensitive and knowledge-driven responses by leveraging the alignment of
	two different data sources. Our approach achieves up to 55% improvement in
	perplexity compared to purely sequential models based on RNNs that are trained
	only on sequences of utterances.},
  url       = {http://aclweb.org/anthology/C16-1318}
}

@InProceedings{poostchi-EtAl:2016:COLING,
  author    = {Poostchi, Hanieh  and  Zare Borzeshi, Ehsan  and  Abdous, Mohammad  and  Piccardi, Massimo},
  title     = {PersoNER: Persian Named-Entity Recognition},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3381--3389},
  abstract  = {Named-Entity Recognition (NER) is still a challenging task for languages with
	low digital resources. The main difficulties arise from the scarcity of
	annotated corpora and the consequent problematic training of an effective NER
	pipeline. To abridge this gap, in this paper we target the Persian language
	that is spoken by a population of over a hundred million people world-wide. We
	first present and provide ArmanPerosNERCorpus, the first manually-annotated
	Persian NER corpus. Then, we introduce PersoNER, an NER pipeline for Persian
	that leverages a word embedding and a sequential max-margin classifier. The
	experimental results show that the proposed approach is capable of achieving
	interesting MUC7 and CoNNL scores while outperforming two alternatives based on
	a CRF and a recurrent neural network.},
  url       = {http://aclweb.org/anthology/C16-1319}
}

@InProceedings{singh-EtAl:2016:COLING2,
  author    = {Singh, Mayank  and  Barua, Barnopriyo  and  Palod, Priyank  and  Garg, Manvi  and  Satapathy, Sidhartha  and  Bushi, Samuel  and  Ayush, Kumar  and  Sai Rohith, Krishna  and  Gamidi, Tulasi  and  Goyal, Pawan  and  Mukherjee, Animesh},
  title     = {OCR++: A Robust Framework For Information Extraction from Scholarly Articles},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3390--3400},
  abstract  = {This paper proposes OCR++, an open-source framework designed for a variety of 
	information extraction tasks from scholarly articles including metadata (title,
	author names, affiliation and e-mail), 
	structure (section headings and body text, table and figure headings, URLs and
	footnotes) and bibliography (citation instances and references).
	We analyze a diverse set of scientific articles written in English to
	understand generic writing patterns and formulate rules to develop this hybrid
	framework.
	Extensive evaluations show that the proposed framework outperforms the existing
	state-of-the-art tools by a large margin in structural information
	extraction along with improved performance in metadata and bibliography
	extraction tasks, both in terms of accuracy (around 50% improvement) and
	processing time (around 52% improvement). 
	A user experience study conducted with the help of 30 researchers reveals that
	the researchers found this system to be very helpful. As an additional
	objective,
	we discuss two novel use cases including automatically extracting links to
	public datasets from the proceedings, which would further accelerate the
	advancement in digital libraries. The result of the framework can be exported
	as a whole into structured TEI-encoded documents. 
	Our framework is accessible online at
	http://www.cnergres.iitkgp.ac.in/OCR++/home/.},
  url       = {http://aclweb.org/anthology/C16-1320}
}

@InProceedings{hazem-morin:2016:COLING,
  author    = {Hazem, Amir  and  Morin, Emmanuel},
  title     = {Efficient Data Selection for Bilingual Terminology Extraction from Comparable Corpora},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3401--3411},
  abstract  = {Comparable corpora are the main alternative to the use of parallel corpora to
	extract bilingual lexicons. Although it is easier to build comparable corpora,
	specialized comparable corpora are often of modest size in comparison with
	corpora issued from the general domain. Consequently, the observations of word
	co-occurrences which are the basis of context-based methods are unreliable. We
	propose in this article to improve word co-occurrences of specialized
	comparable corpora and thus context representation by using general-domain
	data. This idea, which has been already used in machine translation task for
	more than a decade, is not  straightforward for the task of bilingual lexicon
	extraction from specific-domain comparable corpora. We go against the
	mainstream of this task where many studies support the idea that adding
	out-of-domain documents decreases the quality of lexicons. Our empirical
	evaluation shows the advantages of this approach which induces a significant
	gain in the accuracy of extracted lexicons.},
  url       = {http://aclweb.org/anthology/C16-1321}
}

@InProceedings{ljubevsic-samardzic-derungs:2016:COLING,
  author    = {Ljube\v{s}i\'{c}, Nikola  and  Samardzic, Tanja  and  Derungs, Curdin},
  title     = {TweetGeo - A Tool for Collecting, Processing and Analysing Geo-encoded Linguistic Data},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3412--3421},
  abstract  = {In this paper we present a newly developed tool that enables researchers
	interested in spatial variation of language to define a geographic perimeter of
	interest, collect data from the Twitter streaming API published in that
	perimeter, filter the obtained data by language and country, define and extract
	variables of interest and analyse the extracted variables by one spatial
	statistic and two spatial visualisations. We showcase the tool on the area and
	a selection of languages spoken in former Yugoslavia. By defining the
	perimeter, languages and a series of linguistic variables of interest we
	demonstrate the data collection, processing and analysis capabilities of the
	tool.},
  url       = {http://aclweb.org/anthology/C16-1322}
}

@InProceedings{espinosaanke-EtAl:2016:COLING,
  author    = {Espinosa Anke, Luis  and  Camacho-Collados, Jose  and  Rodr\'{i}guez-Fern\'{a}ndez, Sara  and  Saggion, Horacio  and  Wanner, Leo},
  title     = {Extending WordNet with Fine-Grained Collocational Information via Supervised Distributional Learning},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3422--3432},
  abstract  = {WordNet is probably the best known lexical resource in Natural Language
	Processing. While it is widely regarded as a high quality repository of
	concepts and semantic relations, updating and extending it manually is costly.
	One important type of relation which could potentially add enormous value to
	WordNet is the inclusion of collocational information, which is paramount in
	tasks such as Machine Translation, Natural Language Generation and Second
	Language Learning. In this paper, we present ColWordNet (CWN), an extended
	WordNet version with fine-grained collocational information, automatically
	introduced thanks to a method exploiting linear relations between analogous
	sense-level embeddings spaces. We perform both intrinsic and extrinsic
	evaluations, and release CWN for the use and scrutiny of the community.},
  url       = {http://aclweb.org/anthology/C16-1323}
}

@InProceedings{alkhatib-EtAl:2016:COLING,
  author    = {Al Khatib, Khalid  and  Wachsmuth, Henning  and  Kiesel, Johannes  and  Hagen, Matthias  and  Stein, Benno},
  title     = {A News Editorial Corpus for Mining Argumentation Strategies},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3433--3443},
  abstract  = {Many argumentative texts, and news editorials in particular, follow a specific
	strategy to persuade their readers of some opinion or attitude. This includes
	decisions such as when to tell an anecdote or where to support an assumption
	with statistics, which is reflected by the composition of different types of
	argumentative discourse units in a text. While several argument mining corpora
	have recently been published, they do not allow the study of argumentation
	strategies due to incomplete or coarse-grained unit annotations. This paper
	presents a novel corpus with 300 editorials from three diverse news portals
	that provides the basis for mining argumentation strategies. Each unit in all
	editorials has been assigned one of six types by three annotators with a high
	Fleiss’ Kappa agreement of 0.56. We investigate various challenges of the
	annotation process and we conduct a first corpus analysis. Our results reveal
	different strategies across the news portals, exemplifying the benefit of
	studying editorials—a so far underresourced text genre in argument mining.},
  url       = {http://aclweb.org/anthology/C16-1324}
}

@InProceedings{sulubacak-EtAl:2016:COLING,
  author    = {Sulubacak, Umut  and  Gokirmak, Memduh  and  Tyers, Francis  and  \c{C}\"{o}ltekin, \c{C}a\u{g}rı  and  Nivre, Joakim  and  Eryi\u{g}it, G\"{u}l\c{s}en},
  title     = {Universal Dependencies for Turkish},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3444--3454},
  abstract  = {The Universal Dependencies (UD) project was conceived after the substantial
	recent interest in unifying annotation schemes across languages. With its own
	annotation principles and abstract inventory for parts of speech,
	morphosyntactic features and dependency relations, UD aims to facilitate
	multilingual parser development, cross-lingual learning, and parsing research
	from a language typology perspective. This paper presents the Turkish IMST-UD
	Treebank, the first Turkish treebank to be in a UD release. The IMST-UD
	Treebank was automatically converted from the IMST Treebank, which was also
	recently released. We describe this conversion procedure in detail, complete
	with mapping tables. We also present our evaluation of the parsing performances
	of both versions of the IMST Treebank. Our findings suggest that the UD
	framework is at least as viable for Turkish as the original annotation
	framework of the IMST Treebank.},
  url       = {http://aclweb.org/anthology/C16-1325}
}

@InProceedings{eskander-EtAl:2016:COLING,
  author    = {Eskander, Ramy  and  Habash, Nizar  and  Rambow, Owen  and  Pasha, Arfath},
  title     = {Creating Resources for Dialectal Arabic from a Single Annotation: A Case Study on Egyptian and Levantine},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3455--3465},
  abstract  = {Arabic dialects present a special problem for natural language processing
	because there are few resources, they have no standard orthography, and
	have not been studied much.  However, as more and more written dialectal
	Arabic is found in social media, NLP for Arabic dialects becomes an
	important goal.  We present a methodology for creating a morphological
	analyzer and a morphological tagger for dialectal Arabic, and we illustrate
	it on Egyptian and Levantine Arabic.  To our knowledge, these are the first
	analyzer and tagger for Levantine.},
  url       = {http://aclweb.org/anthology/C16-1326}
}

@InProceedings{akbik-guan-li:2016:COLING,
  author    = {Akbik, Alan  and  Guan, Xinyu  and  Li, Yunyao},
  title     = {Multilingual Aliasing for Auto-Generating Proposition Banks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3466--3474},
  abstract  = {Semantic Role Labeling (SRL) is the task of identifying the predicate-argument
	structure in sentences with semantic frame and role labels. For the English
	language, the Proposition Bank provides both a lexicon of all possible semantic
	frames and large amounts of labeled training data. In order to expand SRL
	beyond English, previous work investigated automatic approaches based on
	parallel corpora to automatically generate Proposition Banks for new target
	languages (TLs). However, this approach heuristically produces the frame
	lexicon from word alignments, leading to a range of lexicon-level errors and
	inconsistencies. To address these issues, we propose to manually alias TL verbs
	to existing English frames. For instance, the German verb drehen may evoke
	several meanings, including ”turn something” and ”film something”.
	Accordingly, we alias the former to the frame TURN.01 and the latter to a group
	of frames that includes FILM.01 and SHOOT.03. We execute a large-scale manual
	aliasing effort for three target languages and apply the new lexicons to
	automatically generate large Proposition Banks for Chinese, French and German
	with manually curated frames. We present a detailed evaluation in which we find
	that our proposed approach significantly increases the quality and consistency
	of the generated Proposition Banks. We release these resources to the research
	community.},
  url       = {http://aclweb.org/anthology/C16-1327}
}

@InProceedings{mortensen-EtAl:2016:COLING,
  author    = {Mortensen, David R.  and  Littell, Patrick  and  Bharadwaj, Akash  and  Goyal, Kartik  and  Dyer, Chris  and  Levin, Lori},
  title     = {PanPhon: A Resource for Mapping IPA Segments to Articulatory Feature Vectors},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3475--3484},
  abstract  = {This paper contributes to a growing body of evidence that---when coupled with
	appropriate machine-learning techniques--linguistically motivated,
	information-rich representations can outperform one-hot encodings of linguistic
	data. In particular, we show that phonological features outperform
	character-based models. PanPhon is a database relating over 5,000 IPA segments
	to 21 subsegmental articulatory features. We show that this database boosts
	performance in various NER-related tasks. Phonologically aware, neural CRF
	models built on PanPhon features are able to perform better on monolingual
	Spanish and Turkish NER tasks that character-based models. They have also been
	shown to work well in transfer models (as between Uzbek and Turkish). PanPhon
	features also contribute measurably to Orthography-to-IPA conversion tasks.},
  url       = {http://aclweb.org/anthology/C16-1328}
}

@InProceedings{zhou-EtAl:2016:COLING2,
  author    = {Zhou, Peng  and  Qi, Zhenyu  and  Zheng, Suncong  and  Xu, Jiaming  and  Bao, Hongyun  and  Xu, Bo},
  title     = {Text Classification Improved by Integrating Bidirectional LSTM with Two-dimensional Max Pooling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3485--3495},
  abstract  = {Recurrent Neural Network (RNN) is one of the most popular architectures used in
	Natural Language Processsing (NLP) tasks because its recurrent structure is
	very suitable to process variable-length text. RNN can utilize distributed
	representations of words by first converting the tokens comprising each text
	into vectors, which form a matrix. And this matrix includes two dimensions: the
	time-step dimension and the feature vector dimension. Then most existing models
	usually utilize one-dimensional (1D) max pooling operation or attention-based
	operation only on the time-step dimension to obtain a fixed-length vector. 
	However, the features on the feature vector dimension are not mutually
	independent, and simply applying 1D pooling operation over the time-step
	dimension independently may destroy the structure of the feature
	representation. On the other hand, applying two-dimensional (2D) pooling
	operation over the two dimensions may sample more meaningful features for
	sequence modeling tasks. To integrate the features on both dimensions of the
	matrix, this paper explores applying 2D max pooling operation to obtain a
	fixed-length representation of the text. This paper also utilizes 2D
	convolution to sample more meaningful information of the matrix. Experiments
	are conducted on six text classification tasks, including sentiment analysis,
	question classification, subjectivity classification and newsgroup
	classification. Compared with the state-of-the-art models, the proposed models
	achieve excellent performance on 4 out of 6 tasks. Specifically, one of the
	proposed models achieves highest accuracy on Stanford Sentiment Treebank binary
	classification and fine-grained classification tasks.},
  url       = {http://aclweb.org/anthology/C16-1329}
}

@InProceedings{postma-izquierdobevia-vossen:2016:COLING,
  author    = {Postma, Marten  and  Izquierdo Bevia, Ruben  and  Vossen, Piek},
  title     = {More is not always better: balancing sense distributions for all-words Word Sense Disambiguation},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3496--3506},
  abstract  = {Current Word Sense Disambiguation systems show an extremely poor performance on
	low frequent senses, which is mainly caused by the difference in sense
	distributions between training and test data. The main focus in tackling this
	problem has been on acquiring more data or selecting a single predominant sense
	and not necessarily on the meta properties of the data itself. We demonstrate
	that these properties, such as the volume, provenance, and balancing, play an
	important role with respect to system performance. In this paper, we describe a
	set of experiments to analyze these meta properties in the framework of a
	state-of-the-art WSD system when evaluated on the SemEval-2013 English
	all-words dataset. We show that volume and provenance are indeed important, but
	that approximating the perfect balancing of the selected training data leads to
	an improvement of 21 points and exceeds state-of-the-art systems by 14 points
	while using only simple features. We therefore conclude that unsupervised
	acquisition of training data should be guided by strategies aimed at matching
	meta properties.},
  url       = {http://aclweb.org/anthology/C16-1330}
}

@InProceedings{eger-hoenen-mehler:2016:COLING,
  author    = {Eger, Steffen  and  Hoenen, Armin  and  Mehler, Alexander},
  title     = {Language classification from bilingual word embedding graphs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3507--3518},
  abstract  = {We study the role of the second language in bilingual word embeddings in
	monolingual semantic evaluation tasks. We find strongly and weakly positive
	correlations between down-stream task performance and second language
	similarity to the target language. Additionally, we show how bilingual word
	embeddings can be employed for the task of semantic language classification and
	that joint semantic spaces vary in meaningful ways across second languages. Our
	results support the hypothesis that semantic language similarity is influenced
	by both structural similarity as well as geography/contact.},
  url       = {http://aclweb.org/anthology/C16-1331}
}

@InProceedings{drozd-gladkova-matsuoka:2016:COLING,
  author    = {Drozd, Aleksandr  and  Gladkova, Anna  and  Matsuoka, Satoshi},
  title     = {Word Embeddings, Analogies, and Machine Learning: Beyond king - man + woman = queen},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3519--3530},
  abstract  = {Solving word analogies became one of the most popular benchmarks for word
	embeddings on the assumption that linear relations between word pairs (such as
	$king$:$man$~:: $woman$:$queen$) are indicative of the quality of the
	embedding. We question this assumption by showing that the information not
	detected by linear offset may still be recoverable by a more sophisticated
	search method, and thus is actually encoded in the embedding.
	The general problem with linear offset is its sensitivity to the idiosyncrasies
	of individual words. We show that simple averaging over multiple word pairs
	improves over the state-of-the-art. A further improvement in accuracy (up to
	{30\%} for some embeddings and relations) is achieved by combining cosine
	similarity with an estimation of the extent to which a candidate answer belongs
	to the correct word class. In addition to this practical contribution, this
	work highlights the problem of the interaction between word embeddings and
	analogy retrieval algorithms, and its implications for the evaluation of word
	embeddings and the use of analogies in extrinsic tasks.},
  url       = {http://aclweb.org/anthology/C16-1332}
}

@InProceedings{bjerva-plank-bos:2016:COLING,
  author    = {Bjerva, Johannes  and  Plank, Barbara  and  Bos, Johan},
  title     = {Semantic Tagging with Deep Residual Networks},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3531--3541},
  abstract  = {We propose a novel semantic tagging task, semtagging, tailored for the purpose
	of multilingual semantic parsing, and present the first tagger using deep
	residual networks (ResNets). 
	Our tagger uses both word and character representations, and includes a novel
	residual bypass architecture. 
	We evaluate the tagset both intrinsically on the new task of semantic tagging,
	as well as on Part-of-Speech (POS) tagging. 
	Our system, consisting of a ResNet and an auxiliary loss function predicting
	our semantic tags, significantly outperforms prior results on English Universal
	Dependencies POS tagging (95.71% accuracy on UD v1.2 and 95.67% accuracy on UD
	v1.3).},
  url       = {http://aclweb.org/anthology/C16-1333}
}

@InProceedings{virk-muller-conrath:2016:COLING,
  author    = {Virk, Shafqat Mumtaz  and  Muller, Philippe  and  Conrath, Juliette},
  title     = {A Supervised Approach for Enriching the Relational Structure of Frame Semantics in FrameNet},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3542--3552},
  abstract  = {Frame semantics is a theory of linguistic meanings, and is considered to be a
	useful framework for shallow semantic analysis of natural language. FrameNet,
	which is based on frame semantics, is a popular lexical semantic resource. In
	addition to providing a set of core semantic frames and their frame elements,
	FrameNet also provides relations between those frames (hence providing a
	network of frames i.e. FrameNet). We address here the limited coverage of the
	network of conceptual relations between frames in FrameNet, which has
	previously been pointed out by others. We present a supervised model using rich
	features from three different sources: structural features from the existing
	FrameNet network, information from the WordNet relations between synsets
	projected into semantic frames, and corpus-collected lexical associations. We
	show large improvements over baselines consisting of each of the three groups
	of features in isolation. We then use this model to select frame pairs as
	candidate relations, and perform evaluation on a sample with good precision.},
  url       = {http://aclweb.org/anthology/C16-1334}
}

@InProceedings{dang-EtAl:2016:COLING,
  author    = {Dang, Anh  and  Moh'd, Abidalrahman  and  Islam, Aminul  and  Minghim, Rosane  and  Smit, Michael  and  Milios, Evangelos},
  title     = {Reddit Temporal N-gram Corpus and its Applications on Paraphrase and Semantic Similarity in Social Media using a Topic-based Latent Semantic Analysis},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3553--3564},
  abstract  = {This paper introduces a new large-scale n-gram corpus that is created
	specifically from social media text. Two distinguishing characteristics of this
	corpus are its monthly temporal attribute and that it is created from 1.65
	billion comments of user-generated text in Reddit. The usefulness of this
	corpus is exemplified and evaluated by a novel Topic-based Latent Semantic
	Analysis (TLSA) algorithm. The experimental results show that unsupervised TLSA
	outperforms all the state-of-the-art unsupervised and semi-supervised methods
	in SEMEVAL 2015: paraphrase and semantic similarity in Twitter tasks.},
  url       = {http://aclweb.org/anthology/C16-1335}
}

@InProceedings{garrido-gutierrez:2016:COLING,
  author    = {Garrido, Camilo  and  Gutierrez, Claudio},
  title     = {Dictionaries as Networks: Identifying the graph structure of Ogden’s Basic English},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3565--3576},
  abstract  = {We study the network structure underlying dictionaries. We systematize the
	properties of such networks and show their relevance for linguistics. As case
	of study, we apply this technique to identify the graph structure of Ogden's
	Basic English. We show that it constitutes a strong core of the English
	language network and that classic centrality measures fail to capture this set
	of words.},
  url       = {http://aclweb.org/anthology/C16-1336}
}

@InProceedings{komninos-manandhar:2016:COLING,
  author    = {Komninos, Alexandros  and  Manandhar, Suresh},
  title     = {Structured Generative Models of Continuous Features for Word Sense Induction},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3577--3587},
  abstract  = {We propose a structured generative latent variable model that integrates
	information from multiple contextual representations for Word Sense Induction.
	Our approach jointly models global lexical, local lexical and dependency
	syntactic context. Each context type is associated with a latent variable and
	the three types of variables share a hierarchical structure. We use skip-gram
	based word and dependency context embeddings to construct all three types of
	representations, reducing the total number of parameters to be estimated and
	enabling better generalization. We describe an EM algorithm to efficiently
	estimate model parameters and use the Integrated Complete Likelihood criterion
	to automatically estimate the number of senses. Our model achieves
	state-of-the-art results on the SemEval-2010 and SemEval-2013 Word Sense
	Induction datasets.},
  url       = {http://aclweb.org/anthology/C16-1337}
}

