<?xml version="1.0" encoding="UTF-8" ?>
<volume id="E17">
  <paper id="2000">
    <title>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</title>
    <editor>Mirella Lapata</editor>
    <editor>Phil Blunsom</editor>
    <editor>Alexander Koller</editor>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <url>http://www.aclweb.org/anthology/E17-2</url>
    <bibtype>book</bibtype>
    <bibkey>EACLshort:2017</bibkey>
  </paper>

  <paper id="2001">
    <title>Multilingual Back-and-Forth Conversion between Content and Function Head for Easy Dependency Parsing</title>
    <author><first>Ryosuke</first><last>Kohita</last></author>
    <author><first>Hiroshi</first><last>Noji</last></author>
    <author><first>Yuji</first><last>Matsumoto</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>1&#8211;7</pages>
    <url>http://www.aclweb.org/anthology/E17-2001</url>
    <abstract>Universal Dependencies (UD) is becoming a standard annotation scheme
	cross-linguistically, but it is argued that this scheme centering on content
	words is harder to parse than the conventional one centering on function words.
	 To improve the parsability of UD, we propose a back-and-forth conversion
	algorithm, in which we preprocess the training treebank to increase
	parsability, and reconvert the parser outputs to follow the UD scheme as a
	postprocess.
	 We show that this technique consistently improves LAS across languages even
	with a state-of-the-art parser, in particular on core dependency arcs such as
	nominal modifier.
	 We also provide an in-depth analysis to understand why our method increases
	parsability.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kohita-noji-matsumoto:2017:EACLshort</bibkey>
  </paper>

  <paper id="2002">
    <title>URIEL and lang2vec: Representing languages as typological, geographical, and phylogenetic vectors</title>
    <author><first>Patrick</first><last>Littell</last></author>
    <author><first>David R.</first><last>Mortensen</last></author>
    <author><first>Ke</first><last>Lin</last></author>
    <author><first>Katherine</first><last>Kairis</last></author>
    <author><first>Carlisle</first><last>Turner</last></author>
    <author><first>Lori</first><last>Levin</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>8&#8211;14</pages>
    <url>http://www.aclweb.org/anthology/E17-2002</url>
    <abstract>We introduce the URIEL knowledge base for massively multilingual NLP and the
	lang2vec utility, which provides information-rich vector identifications of
	languages drawn from typological, geographical, and phylogenetic databases and
	normalized to have straightforward and consistent formats, naming, and
	semantics.  The goal of URIEL and lang2vec is to enable multilingual NLP,
	especially on less-resourced languages and make possible types of experiments
	(especially but not exclusively related to NLP tasks) that are otherwise
	difficult or impossible due to the sparsity and incommensurability of the data
	sources.  lang2vec vectors have been shown to reduce perplexity in multilingual
	language modeling, when compared to one-hot language identification vectors.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>littell-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2003">
    <title>An experimental analysis of Noise-Contrastive Estimation: the noise distribution matters</title>
    <author><first>Matthieu</first><last>Labeau</last></author>
    <author><first>Alexandre</first><last>Allauzen</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>15&#8211;20</pages>
    <url>http://www.aclweb.org/anthology/E17-2003</url>
    <abstract>Noise Contrastive Estimation (NCE) is a learning procedure that is regularly
	used to train neural language models, since it avoids the computational
	bottleneck caused by the output softmax. In this paper, we attempt to explain
	some of the weaknesses of this objective function, and to draw directions for
	further developments. Experiments on a small task show the issues raised by an
	unigram noise distribution, and that a context dependent noise distribution,
	such as the bigram distribution, can solve these issues and provide stable and
	data-efficient learning.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>labeau-allauzen:2017:EACLshort</bibkey>
  </paper>

  <paper id="2004">
    <title>Robust Training under Linguistic Adversity</title>
    <author><first>Yitong</first><last>Li</last></author>
    <author><first>Trevor</first><last>Cohn</last></author>
    <author><first>Timothy</first><last>Baldwin</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>21&#8211;27</pages>
    <url>http://www.aclweb.org/anthology/E17-2004</url>
    <abstract>Deep neural networks have achieved remarkable results across many language
	processing tasks, however they have been shown to be susceptible to overfitting
	and highly sensitive to noise, including adversarial attacks. In this work, we
	propose a linguistically-motivated  approach for training robust models based
	on exposing the model to corrupted text examples at training time. We consider
	several flavours of linguistically plausible corruption, include lexical
	semantic and syntactic methods. Empirically, we evaluate our method with a
	convolutional neural model across a range of sentiment analysis datasets.
	Compared with a baseline and the dropout method, our method achieves better
	overall performance.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>li-cohn-baldwin:2017:EACLshort</bibkey>
  </paper>

  <paper id="2005">
    <title>Using Twitter Language to Predict the Real Estate Market</title>
    <author><first>Mohammadzaman</first><last>Zamani</last></author>
    <author><first>H. Andrew</first><last>Schwartz</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>28&#8211;33</pages>
    <url>http://www.aclweb.org/anthology/E17-2005</url>
    <abstract>We explore whether social media can provide a window into community real
	estate -foreclosure rates and price changes- beyond that of traditional
	economic and demographic variables. We find language use in Twitter not only
	predicts real estate outcomes as well as traditional variables across counties,
	but that including Twitter language in traditional models leads to a
	significant improvement (e.g. from Pearson r = :50 to r = :59 for price
	changes). We overcome the challenge of the relative sparsity and noise in
	Twitter language variables by showing that training on the residual error of
	the traditional models leads to more accurate overall assessments. Finally, we
	discover that it is Twitter language related to business (e.g. 'company',
	'marketing') and technology (e.g. 'technology', 'internet'), among
	others, that yield predictive power over economics.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>zamani-schwartz:2017:EACLshort</bibkey>
  </paper>

  <paper id="2006">
    <title>Lexical Simplification with Neural Ranking</title>
    <author><first>Gustavo</first><last>Paetzold</last></author>
    <author><first>Lucia</first><last>Specia</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>34&#8211;40</pages>
    <url>http://www.aclweb.org/anthology/E17-2006</url>
    <abstract>We present a new Lexical Simplification approach that exploits Neural Networks
	to learn substitutions from the Newsela corpus - a large set of professionally
	produced simplifications. We extract candidate substitutions by combining the
	Newsela corpus with a retrofitted context-aware word embeddings model and rank
	them using a new neural regression model that learns rankings from annotated
	data. This strategy leads to the highest Accuracy, Precision and F1 scores to
	date in standard datasets for the task.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>paetzold-specia:2017:EACLshort</bibkey>
  </paper>

  <paper id="2007">
    <title>The limits of automatic summarisation according to ROUGE</title>
    <author><first>Natalie</first><last>Schluter</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>41&#8211;45</pages>
    <url>http://www.aclweb.org/anthology/E17-2007</url>
    <abstract>This paper discusses some central caveats of summarisation, incurred in the use
	of the ROUGE metric for evaluation, with respect to optimal solutions. The task
	is NP-hard, of which we give the first proof.  Still, as we show
	empirically for three central benchmark datasets for the task, greedy
	algorithms empirically seem to perform optimally according to the metric.
	Additionally, overall quality assurance is problematic: there is no natural
	upper bound on the quality of summarisation systems, and even humans are
	excluded from performing optimal summarisation.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>schluter:2017:EACLshort</bibkey>
  </paper>

  <paper id="2008">
    <title>Crowd-Sourced Iterative Annotation for Narrative Summarization Corpora</title>
    <author><first>Jessica</first><last>Ouyang</last></author>
    <author><first>Serina</first><last>Chang</last></author>
    <author><first>Kathy</first><last>McKeown</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>46&#8211;51</pages>
    <url>http://www.aclweb.org/anthology/E17-2008</url>
    <abstract>We present an iterative annotation process for producing aligned, parallel
	corpora of abstractive and extractive summaries for narrative. Our approach
	uses a combination of trained annotators and crowd-sourcing, allowing us to
	elicit human-generated summaries and alignments quickly and at low cost. We use
	crowd-sourcing to annotate aligned phrases with the text-to-text generation
	techniques needed to transform each phrase into the other. We apply this
	process to a corpus of 476 personal narratives, which we make available on the
	Web.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>ouyang-chang-mckeown:2017:EACLshort</bibkey>
  </paper>

  <paper id="2009">
    <title>Broad Context Language Modeling as Reading Comprehension</title>
    <author><first>Zewei</first><last>Chu</last></author>
    <author><first>Hai</first><last>Wang</last></author>
    <author><first>Kevin</first><last>Gimpel</last></author>
    <author><first>David</first><last>McAllester</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>52&#8211;57</pages>
    <url>http://www.aclweb.org/anthology/E17-2009</url>
    <abstract>Progress in text understanding has been driven by large datasets that test
	particular capabilities, like recent datasets for reading comprehension
	(Hermann et al., 2015). We focus here on the LAMBADA dataset (Paperno et al.,
	2016), a word prediction task requiring broader context than the immediate
	sentence. We view LAMBADA as a reading comprehension problem and apply
	comprehension models based on neural networks. Though these models are
	constrained to choose a word from the context, they improve the state of the
	art on LAMBADA from 7.3% to 49%. We analyze 100 instances, finding that neural
	network readers perform well in cases that involve selecting a name from the
	context based on dialogue or discourse cues but struggle when coreference
	resolution or external knowledge is needed.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>chu-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2010">
    <title>Detecting negation scope is easy, except when it isn't</title>
    <author><first>Federico</first><last>Fancellu</last></author>
    <author><first>Adam</first><last>Lopez</last></author>
    <author><first>Bonnie</first><last>Webber</last></author>
    <author><first>Hangfeng</first><last>He</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>58&#8211;63</pages>
    <url>http://www.aclweb.org/anthology/E17-2010</url>
    <abstract>Several corpora have been annotated with negation scope&#8211;-the set of words
	whose meaning is negated by a cue like the word &#x201c;not&#x201d;&#8211;-leading to the
	development of classifiers that detect negation scope with high accuracy. We
	show that for nearly all of these corpora, this high accuracy can be attributed
	to a single fact:  they frequently annotate negation scope as a single span of
	text delimited by punctuation. For negation scopes not of this form, detection
	accuracy is low and under-sampling the easy training examples does not
	substantially improve accuracy. We demonstrate that this is partly an artifact
	of annotation guidelines, and we argue that future negation scope annotation
	efforts should focus on these more difficult cases.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>fancellu-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2011">
    <title>MT/IE: Cross-lingual Open Information Extraction with Neural Sequence-to-Sequence Models</title>
    <author><first>Sheng</first><last>Zhang</last></author>
    <author><first>Kevin</first><last>Duh</last></author>
    <author><first>Benjamin</first><last>Van Durme</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>64&#8211;70</pages>
    <url>http://www.aclweb.org/anthology/E17-2011</url>
    <abstract>Cross-lingual information extraction is the task of distilling facts from
	foreign language (e.g. Chinese text) into representations in another language
	that is preferred by the user (e.g. English tuples). Conventional pipeline
	solutions decompose the task as machine translation followed by information
	extraction (or vice versa). We propose a joint solution with a neural sequence
	model, and show that it outperforms the pipeline in a cross-lingual open
	information extraction setting by 1-4 BLEU and 0.5-0.8 F1.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>zhang-duh-vandurme:2017:EACLshort</bibkey>
  </paper>

  <paper id="2012">
    <title>Learning to Negate Adjectives with Bilinear Models</title>
    <author><first>Laura</first><last>Rimell</last></author>
    <author><first>Amandla</first><last>Mabona</last></author>
    <author><first>Luana</first><last>Bulat</last></author>
    <author><first>Douwe</first><last>Kiela</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>71&#8211;78</pages>
    <url>http://www.aclweb.org/anthology/E17-2012</url>
    <abstract>We learn a mapping that negates adjectives by predicting an adjective's antonym
	in an arbitrary word embedding model. We show that both linear models and
	neural networks improve on this task when they have access to a vector
	representing the semantic domain of the input word, e.g. a centroid of
	temperature words when predicting the antonym of 'cold'. We introduce a
	continuous class-conditional bilinear neural network which is able to negate
	adjectives with high precision.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>rimell-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2013">
    <title>Instances and concepts in distributional space</title>
    <author><first>Gemma</first><last>Boleda</last></author>
    <author><first>Abhijeet</first><last>Gupta</last></author>
    <author><first>Sebastian</first><last>Pad&#243;</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>79&#8211;85</pages>
    <url>http://www.aclweb.org/anthology/E17-2013</url>
    <abstract>Instances (&#x201c;Mozart&#x201d;) are ontologically distinct from concepts or classes
	(&#x201c;composer&#x201d;). Natural language encompasses both, but instances have received
	comparatively little attention in distributional semantics. Our results show
	that instances and concepts differ in their distributional properties. We also
	establish that instantiation detection (&#x201c;Mozart &#8211; composer&#x201d;) is generally
	easier than hypernymy detection (&#x201c;chemist &#8211; scientist&#x201d;), and that results on
	the influence of input representation do not transfer from hyponymy to
	instantiation.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>boleda-gupta-pado:2017:EACLshort</bibkey>
  </paper>

  <paper id="2014">
    <title>Is this a Child, a Girl or a Car? Exploring the Contribution of Distributional Similarity to Learning Referential Word Meanings</title>
    <author><first>Sina</first><last>Zarrie&#223;</last></author>
    <author><first>David</first><last>Schlangen</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>86&#8211;91</pages>
    <url>http://www.aclweb.org/anthology/E17-2014</url>
    <abstract>There has recently been a lot of work trying to use images of referents of
	words for improving vector space meaning representations  derived from text. We
	investigate the opposite direction, as it were, trying to improve visual word
	predictors that identify objects in images, by exploiting distributional
	similarity information during training. We show that for certain words (such as
	entry-level nouns or hypernyms), we can indeed learn better referential word
	meanings by taking into account their semantic similarity to other words. For
	other words, there is no or even a detrimental effect, compared to a learning
	setup that presents even semantically related objects as negative instances.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>zarriess-schlangen:2017:EACLshort</bibkey>
  </paper>

  <paper id="2015">
    <title>The Semantic Proto-Role Linking Model</title>
    <author><first>Aaron Steven</first><last>White</last></author>
    <author><first>Kyle</first><last>Rawlins</last></author>
    <author><first>Benjamin</first><last>Van Durme</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>92&#8211;98</pages>
    <url>http://www.aclweb.org/anthology/E17-2015</url>
    <abstract>We propose the semantic proto-role linking model, which jointly induces both
	predicate-specific semantic roles and predicate-general semantic proto-roles
	based on semantic proto-role property likelihood judgments. We use this model
	to empirically evaluate Dowty's thematic proto-role linking theory.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>white-rawlins-vandurme:2017:EACLshort</bibkey>
  </paper>

  <paper id="2016">
    <title>The Language of Place: Semantic Value from Geospatial Context</title>
    <author><first>Anne</first><last>Cocos</last></author>
    <author><first>Chris</first><last>Callison-Burch</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>99&#8211;104</pages>
    <url>http://www.aclweb.org/anthology/E17-2016</url>
    <abstract>There is a relationship between what we say and where we say it. Word
	embeddings are usually trained assuming that semantically-similar words occur
	within the same textual contexts. We investigate the extent to which
	semantically-similar words occur within the same geospatial contexts. We enrich
	a corpus of geolocated Twitter posts with physical data derived from Google
	Places and OpenStreetMap, and train word embeddings using the resulting
	geospatial contexts. Intrinsic evaluation of the resulting vectors shows that
	geographic context alone does provide useful information about semantic
	relatedness.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>cocos-callisonburch:2017:EACLshort</bibkey>
  </paper>

  <paper id="2017">
    <title>Are Emojis Predictable?</title>
    <author><first>Francesco</first><last>Barbieri</last></author>
    <author><first>Miguel</first><last>Ballesteros</last></author>
    <author><first>Horacio</first><last>Saggion</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>105&#8211;111</pages>
    <url>http://www.aclweb.org/anthology/E17-2017</url>
    <abstract>Emojis are ideograms which are naturally combined with plain text to visually
	complement or condense the meaning of a message. Despite being widely used in
	social media, their underlying semantics have received little attention from a
	Natural Language Processing standpoint. 
	In this paper, we investigate the relation between words and emojis, studying
	the novel task of predicting which emojis are evoked by text-based tweet
	messages.  We train several models based on Long Short-Term Memory networks
	(LSTMs) in this task.                                                 
	Our experimental results show that our neural model outperforms a baseline as
	well as humans solving the same task, suggesting that computational models are
	able to better capture the underlying semantics of emojis.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>barbieri-ballesteros-saggion:2017:EACLshort</bibkey>
  </paper>

  <paper id="2018">
    <title>A Rich Morphological Tagger for English: Exploring the Cross-Linguistic Tradeoff Between Morphology and Syntax</title>
    <author><first>Christo</first><last>Kirov</last></author>
    <author><first>John</first><last>Sylak-Glassman</last></author>
    <author><first>Rebecca</first><last>Knowles</last></author>
    <author><first>Ryan</first><last>Cotterell</last></author>
    <author><first>Matt</first><last>Post</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>112&#8211;117</pages>
    <url>http://www.aclweb.org/anthology/E17-2018</url>
    <abstract>A traditional claim in linguistics is that all human languages are
	  equally expressive&#8211;-able to convey the same wide range of meanings.
	  Morphologically rich languages, such as Czech, rely on overt
	  inflectional and derivational morphology to convey many semantic
	  distinctions.  Languages with comparatively limited morphology, such
	  as English, should be able to accomplish the same using a
	  combination of syntactic and contextual cues.  We capitalize on this
	  idea by training a tagger for English that uses syntactic features
	  obtained by automatic parsing to recover complex morphological tags
	  projected from Czech.  The high accuracy of the resulting model
	  provides quantitative confirmation of the underlying linguistic
	  hypothesis of equal expressivity, and bodes well for future
	  improvements in downstream HLT tasks including machine translation.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kirov-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2019">
    <title>Context-Aware Prediction of Derivational Word-forms</title>
    <author><first>Ekaterina</first><last>Vylomova</last></author>
    <author><first>Ryan</first><last>Cotterell</last></author>
    <author><first>Timothy</first><last>Baldwin</last></author>
    <author><first>Trevor</first><last>Cohn</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>118&#8211;124</pages>
    <url>http://www.aclweb.org/anthology/E17-2019</url>
    <abstract>Derivational morphology is a fundamental and complex characteristic of
	language.
	In this paper we propose a new task of predicting the derivational form
	of a given base-form lemma that is appropriate for a given context.
	We present an encoder-decoder style neural network to produce a
	derived form character-by-character, based on its corresponding
	character-level representation of the base form and the context. 
	We demonstrate that our model is able to generate valid context-sensitive 
	derivations from known base forms, but is less accurate under lexicon agnostic
	setting.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>vylomova-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2020">
    <title>Comparing Character-level Neural Language Models Using a Lexical Decision Task</title>
    <author><first>Ga&#235;l</first><last>Le Godais</last></author>
    <author><first>Tal</first><last>Linzen</last></author>
    <author><first>Emmanuel</first><last>Dupoux</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>125&#8211;130</pages>
    <url>http://www.aclweb.org/anthology/E17-2020</url>
    <abstract>What is the information captured by neural network models of language? We
	address this question in the case of character-level recurrent neural language
	models. These models do not have explicit word representations; do they acquire
	implicit ones? We assess the lexical capacity of a network using the lexical
	decision task common in psycholinguistics: the system is required to decide
	whether or not a string of characters forms a word. We explore how accuracy on
	this task is affected by the architecture of the network, focusing on cell type
	(LSTM vs. SRN), depth and width. We also compare these architectural properties
	to a simple count of the parameters of the network. The overall number of
	parameters in the network turns out to be the most important predictor of
	accuracy; in particular, there is little evidence that deeper net- works are
	beneficial for this task.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>legodais-linzen-dupoux:2017:EACLshort</bibkey>
  </paper>

  <paper id="2021">
    <title>Optimal encoding! - Information Theory constrains article omission in newspaper headlines</title>
    <author><first>Robin</first><last>Lemke</last></author>
    <author><first>Eva</first><last>Horch</last></author>
    <author><first>Ingo</first><last>Reich</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>131&#8211;135</pages>
    <url>http://www.aclweb.org/anthology/E17-2021</url>
    <abstract>In this paper we pursue the hypothesis that the distribution of article
	omission specifically is constrained by principles of Information Theory
	(Shannon 1948). In particular, Information Theory predicts a stronger
	preference for article omission before nouns which are relatively unpredictable
	in context of the
	preceding words. We investigated article omission in German newspaper headlines
	with a corpus and acceptability rating study. Both support our hypothesis:
	Articles are inserted more often before unpredictable nouns and subjects
	perceive
	article omission before predictable nouns as more well-formed than before
	unpredictable ones. This suggests that information theoretic principles
	constrain the distribution of article omission in headlines.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>lemke-horch-reich:2017:EACLshort</bibkey>
  </paper>

  <paper id="2022">
    <title>A Computational Analysis of the Language of Drug Addiction</title>
    <author><first>Carlo</first><last>Strapparava</last></author>
    <author><first>Rada</first><last>Mihalcea</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>136&#8211;142</pages>
    <url>http://www.aclweb.org/anthology/E17-2022</url>
    <abstract>We present a computational analysis of the language of drug users when talking
	about their drug experiences. We introduce a new dataset of over 4,000
	descriptions of experiences reported by users of four main drug types, and show
	that we can predict with an F1-score of up to 88% the drug behind a certain
	experience. We also perform an analysis of the dominant psycholinguistic
	processes and dominant emotions associated with each drug type, which sheds 
	light on the characteristics of drug users.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>strapparava-mihalcea:2017:EACLshort</bibkey>
  </paper>

  <paper id="2023">
    <title>A Practical Perspective on Latent Structured Prediction for Coreference Resolution</title>
    <author><first>Iryna</first><last>Haponchyk</last></author>
    <author><first>Alessandro</first><last>Moschitti</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>143&#8211;149</pages>
    <url>http://www.aclweb.org/anthology/E17-2023</url>
    <abstract>Latent structured prediction theory proposes powerful methods such as Latent
	SVMstruct (LSSVM), which can potentially be very appealing for coreference
	resolution (CR). In contrast, only small work is available, mainly targeting
	the latent structured perceptron (LSP).
	In this paper, we carried out a practical study comparing for the first time
	online learning with LSSVM. We analyze the intricacies that may have made
	initial attempts to use LSSVM fail, i.e., a huge training time and much lower
	accuracy produced by Kruskal's spanning tree algorithm. In this respect, we
	also propose a new effective feature selection approach for improving system
	efficiency. The results show that LSP, if correctly parameterized, produces the
	same performance as LSSVM, being much more efficient.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>haponchyk-moschitti:2017:EACLshort</bibkey>
  </paper>

  <paper id="2024">
    <title>Do We Need Cross Validation for Discourse Relation Classification?</title>
    <author><first>Wei</first><last>Shi</last></author>
    <author><first>Vera</first><last>Demberg</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>150&#8211;156</pages>
    <url>http://www.aclweb.org/anthology/E17-2024</url>
    <abstract>The task of implicit discourse relation classification has received increased
	attention in recent years, including two CoNNL shared tasks on the topic.
	Existing machine learning models for the task train on sections 2-21 of the
	PDTB and test on section 23, which includes a total of 761 implicit discourse
	relations. In this paper, we'd like to make a methodological point, arguing
	that the standard test set is too small to draw conclusions about whether the
	inclusion of certain features constitute a genuine improvement, or whether one
	got lucky with some properties of the test set, and argue for the adoption of
	cross validation for the discourse relation classification task by the
	community.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>shi-demberg:2017:EACLshort</bibkey>
  </paper>

  <paper id="2025">
    <title>Using the Output Embedding to Improve Language Models</title>
    <author><first>Ofir</first><last>Press</last></author>
    <author><first>Lior</first><last>Wolf</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>157&#8211;163</pages>
    <url>http://www.aclweb.org/anthology/E17-2025</url>
    <abstract>We study the topmost weight matrix of neural network language models. We show
	that this matrix constitutes a valid word embedding. When training language
	models, we recommend tying the input embedding and this output embedding. We
	analyze the resulting update rules and show that the tied embedding evolves in
	a more similar way to the output embedding than to the input embedding in the
	untied model. We also offer a new method of regularizing the output embedding.
	Our methods lead to a significant reduction in perplexity, as we are able to
	show on a variety of neural network language models. Finally, we show that
	weight tying can reduce the size of neural translation models to less than half
	of their original size without harming their performance.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>press-wolf:2017:EACLshort</bibkey>
  </paper>

  <paper id="2026">
    <title>Identifying beneficial task relations for multi-task learning in deep neural networks</title>
    <author><first>Joachim</first><last>Bingel</last></author>
    <author><first>Anders</first><last>S&#248;gaard</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>164&#8211;169</pages>
    <url>http://www.aclweb.org/anthology/E17-2026</url>
    <abstract>Multi-task learning (MTL) in deep neural networks for NLP has recently received
	increasing interest due to some compelling benefits, including its potential to
	efficiently regularize models and to reduce the need for labeled data. While it
	has brought significant improvements in a number of NLP tasks, mixed results
	have been reported, and little is known about the conditions under which MTL
	leads to gains in NLP. This paper sheds light on the specific task relations
	that can lead to gains from MTL models over single-task setups.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bingel-sogaard:2017:EACLshort</bibkey>
  </paper>

  <paper id="2027">
    <title>Effective search space reduction for spell correction using character neural embeddings</title>
    <author><first>Harshit</first><last>Pande</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>170&#8211;174</pages>
    <url>http://www.aclweb.org/anthology/E17-2027</url>
    <abstract>We present a novel, unsupervised, and distance measure agnostic method for
	search
	space reduction in spell correction using neural character embeddings. The
	embed-
	dings are learned by skip-gram word2vec training on sequences generated from
	dictionary words in a phonetic information-retentive manner. We report a very
	high performance in terms of both success rates and reduction of search space
	on the Birkbeck spelling error corpus. To the best of our knowledge, this is
	the first application of word2vec to spell correction.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>pande:2017:EACLshort</bibkey>
  </paper>

  <paper id="2028">
    <title>Explaining and Generalizing Skip-Gram through Exponential Family Principal Component Analysis</title>
    <author><first>Ryan</first><last>Cotterell</last></author>
    <author><first>Adam</first><last>Poliak</last></author>
    <author><first>Benjamin</first><last>Van Durme</last></author>
    <author><first>Jason</first><last>Eisner</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>175&#8211;181</pages>
    <url>http://www.aclweb.org/anthology/E17-2028</url>
    <abstract>The popular skip-gram model induces word embeddings by exploiting the signal
	from word-context coocurrence. We offer a new interpretation of skip-gram based
	on exponential family PCA-a form of matrix factorization to generalize the
	skip-gram model to tensor factorization. In turn, this lets us train embeddings
	through richer higher-order coocurrences, e.g., triples that include positional
	information (to incorporate syntax) or morphological information (to share
	parameters across related words). We experiment on 40 languages and show our
	model improves upon skip-gram.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>cotterell-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2029">
    <title>Latent Variable Dialogue Models and their Diversity</title>
    <author><first>Kris</first><last>Cao</last></author>
    <author><first>Stephen</first><last>Clark</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>182&#8211;187</pages>
    <url>http://www.aclweb.org/anthology/E17-2029</url>
    <abstract>We present a dialogue generation model that directly captures the variability
	in possible responses to a given input, which reduces the 'boring output' issue
	of deterministic dialogue models. Experiments show that our model generates
	more diverse outputs than baseline models, and also generates more consistently
	 acceptable output than sampling from a deterministic encoder-decoder model.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>cao-clark:2017:EACLshort</bibkey>
  </paper>

  <paper id="2030">
    <title>Age Group Classification with Speech and Metadata Multimodality Fusion</title>
    <author><first>Denys</first><last>Katerenchuk</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>188&#8211;193</pages>
    <url>http://www.aclweb.org/anthology/E17-2030</url>
    <abstract>Children comprise a significant proportion
	of TV viewers and it is worthwhile to customize
	the experience for them. However,
	identifying who is a child in the audience
	can be a challenging task. We present initial
	studies of a novel method which combines
	utterances with user metadata. In
	particular, we develop an ensemble of different
	machine learning techniques on different
	subsets of data to improve child detection.
	Our initial results show an 9.2%
	absolute improvement over the baseline,
	leading to a state-of-the-art performance.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>katerenchuk:2017:EACLshort</bibkey>
  </paper>

  <paper id="2031">
    <title>Automatically augmenting an emotion dataset improves classification using audio</title>
    <author><first>Egor</first><last>Lakomkin</last></author>
    <author><first>Cornelius</first><last>Weber</last></author>
    <author><first>Stefan</first><last>Wermter</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>194&#8211;197</pages>
    <url>http://www.aclweb.org/anthology/E17-2031</url>
    <abstract>In this work, we tackle a problem of speech emotion classification. One of the
	issues in the area of affective computation is that the amount of annotated
	data is very limited. On the other hand, the number of ways that the same
	emotion can be expressed verbally is enormous due to variability between
	speakers. This is one of the factors that limits performance and
	generalization. We propose a simple method that extracts audio samples from
	movies using textual sentiment analysis. As a result, it is possible to
	automatically construct a larger dataset of audio samples with positive,
	negative emotional and neutral speech. We show that pretraining recurrent
	neural network on such a dataset yields better results on the challenging
	EmotiW corpus. This experiment shows a potential benefit of combining textual
	sentiment analysis with vocal information.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>lakomkin-weber-wermter:2017:EACLshort</bibkey>
  </paper>

  <paper id="2032">
    <title>On-line Dialogue Policy Learning with Companion Teaching</title>
    <author><first>Lu</first><last>Chen</last></author>
    <author><first>Runzhe</first><last>Yang</last></author>
    <author><first>Cheng</first><last>Chang</last></author>
    <author><first>Zihao</first><last>Ye</last></author>
    <author><first>Xiang</first><last>Zhou</last></author>
    <author><first>Kai</first><last>Yu</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>198&#8211;204</pages>
    <url>http://www.aclweb.org/anthology/E17-2032</url>
    <abstract>On-line dialogue policy learning is the key for building evolvable
	conversational agent in real world scenarios. Poor initial policy can easily
	lead to bad user experience and consequently fail to attract sufficient users
	for policy training. A novel framework,  companion teaching, is proposed to
	include a human teacher in the dialogue policy training loop to address the
	cold start problem. Here, dialogue policy is trained using not only user's
	reward, but also teacher's example action as well as estimated immediate reward
	at turn level. Simulation experiments showed that, with small number of human
	teaching dialogues, the proposed approach can effectively improve user
	experience at the beginning and smoothly lead to good performance with more
	user interaction data.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>chen-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2033">
    <title>Hybrid Dialog State Tracker with ASR Features</title>
    <author><first>Miroslav</first><last>Vodol&#225;n</last></author>
    <author><first>Rudolf</first><last>Kadlec</last></author>
    <author><first>Jan</first><last>Kleindienst</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>205&#8211;210</pages>
    <url>http://www.aclweb.org/anthology/E17-2033</url>
    <abstract>This paper presents a hybrid dialog state tracker enhanced by trainable Spoken
	Language Understanding (SLU) for slot-filling dialog systems. Our architecture
	is inspired by previously proposed neural-network-based belief-tracking
	systems. In addition, we extended some parts of our modular architecture with
	differentiable rules to allow end-to-end training. We hypothesize that these
	rules allow our tracker to generalize better than pure machine-learning based
	systems. For evaluation, we used the Dialog State Tracking Challenge (DSTC) 2
	dataset - a popular belief tracking testbed with dialogs from restaurant
	information system. To our knowledge, our hybrid tracker sets a new
	state-of-the-art result in three out of four categories within the DSTC2.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>vodolan-kadlec-kleindienst:2017:EACLshort</bibkey>
  </paper>

  <paper id="2034">
    <title>Morphological Analysis without Expert Annotation</title>
    <author><first>Garrett</first><last>Nicolai</last></author>
    <author><first>Grzegorz</first><last>Kondrak</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>211&#8211;216</pages>
    <url>http://www.aclweb.org/anthology/E17-2034</url>
    <abstract>The task of morphological analysis is to produce a complete list of lemma+tag
	analyses for a given word-form. We propose a discriminative string transduction
	approach which exploits plain inflection tables and raw text corpora, thus
	obviating the need for expert annotation. Experiments
	on four languages demonstrate that our system has much higher coverage
	than a hand-engineered FST analyzer, and is more accurate than a
	state-of-the-art morphological tagger.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>nicolai-kondrak:2017:EACLshort</bibkey>
  </paper>

  <paper id="2035">
    <title>Morphological Analysis of the Dravidian Language Family</title>
    <author><first>Arun</first><last>Kumar</last></author>
    <author><first>Ryan</first><last>Cotterell</last></author>
    <author><first>Llu&#237;s</first><last>Padr&#243;</last></author>
    <author><first>Antoni</first><last>Oliver</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>217&#8211;222</pages>
    <url>http://www.aclweb.org/anthology/E17-2035</url>
    <abstract>The Dravidian languages are one of the
	most widely spoken language families in
	the world, yet there are very few annotated
	resources available to NLP researchers. To
	remedy this, we create DravMorph, a corpus annotated for
	morphological segmentation and part-of-speech. Additionally, we exploit novel
	features and higher-order models to set state-of-the-art results on these
	corpora on both tasks, beating techniques proposed in the literature by as much
	as 4 points in segmentation F1.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kumar-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2036">
    <title>BabelDomains: Large-Scale Domain Labeling of Lexical Resources</title>
    <author><first>Jose</first><last>Camacho-Collados</last></author>
    <author><first>Roberto</first><last>Navigli</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>223&#8211;228</pages>
    <url>http://www.aclweb.org/anthology/E17-2036</url>
    <abstract>In this paper we present BabelDomains, a unified resource which provides
	lexical items with information about domains of knowledge. We propose an
	automatic method that uses knowledge from various lexical resources, exploiting
	both distributional and graph-based clues, to accurately propagate domain
	information. We evaluate our methodology intrinsically on two lexical resources
	(WordNet and BabelNet), achieving a precision over 80% in both cases. Finally,
	we show the potential of BabelDomains in a supervised learning setting,
	clustering training data by domain for hypernym discovery.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>camachocollados-navigli:2017:EACLshort</bibkey>
  </paper>

  <paper id="2037">
    <title>JFLEG: A Fluency Corpus and Benchmark for Grammatical Error Correction</title>
    <author><first>Courtney</first><last>Napoles</last></author>
    <author><first>Keisuke</first><last>Sakaguchi</last></author>
    <author><first>Joel</first><last>Tetreault</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>229&#8211;234</pages>
    <url>http://www.aclweb.org/anthology/E17-2037</url>
    <abstract>We present a new parallel corpus, JHU FLuency-Extended GUG corpus (JFLEG) for
	developing and evaluating grammatical error correction (GEC). Unlike other
	corpora, it represents a broad range of language proficiency levels and uses
	holistic fluency edits to not only correct grammatical errors but also make the
	original text more native sounding. We describe the types of corrections made
	and benchmark four leading GEC systems on this corpus, identifying specific
	areas in which they do well and how they can improve. JFLEG fulfills the need
	for a new gold standard to properly assess the current state of GEC.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>napoles-sakaguchi-tetreault:2017:EACLshort</bibkey>
  </paper>

  <paper id="2038">
    <title>A Parallel Corpus for Evaluating Machine Translation between Arabic and European Languages</title>
    <author><first>Nizar</first><last>Habash</last></author>
    <author><first>Nasser</first><last>Zalmout</last></author>
    <author><first>Dima</first><last>Taji</last></author>
    <author><first>Hieu</first><last>Hoang</last></author>
    <author><first>Maverick</first><last>Alzate</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>235&#8211;241</pages>
    <url>http://www.aclweb.org/anthology/E17-2038</url>
    <abstract>We present Arab-Acquis, a large publicly available dataset for evaluating
	machine translation between 22 European languages and Arabic. Arab-Acquis
	consists of over 12,000 sentences from the JRC-Acquis (Acquis Communautaire)
	corpus translated twice by professional translators, once from English and once
	from French, and totaling over 600,000 words.  The corpus follows previous data
	splits in the literature for tuning, development, and testing. We describe the
	corpus and how it was created. We also present the first benchmarking results
	on translating to and from Arabic for 22 European languages.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>habash-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2039">
    <title>The Parallel Meaning Bank: Towards a Multilingual Corpus of Translations Annotated with Compositional Meaning Representations</title>
    <author><first>Lasha</first><last>Abzianidze</last></author>
    <author><first>Johannes</first><last>Bjerva</last></author>
    <author><first>Kilian</first><last>Evang</last></author>
    <author><first>Hessel</first><last>Haagsma</last></author>
    <author><first>Rik</first><last>van Noord</last></author>
    <author><first>Pierre</first><last>Ludmann</last></author>
    <author><first>Duc-Duy</first><last>Nguyen</last></author>
    <author><first>Johan</first><last>Bos</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>242&#8211;247</pages>
    <url>http://www.aclweb.org/anthology/E17-2039</url>
    <abstract>The Parallel Meaning Bank is a corpus of translations annotated with shared,
	formal meaning representations comprising over 11 million words divided over
	four languages (English, German, Italian, and Dutch). Our approach is based on
	cross-lingual projection: automatically produced (and manually corrected)
	semantic annotations for English sentences are mapped onto their word-aligned
	translations, assuming that the translations are meaning-preserving. The
	semantic annotation consists of five main steps: (i) segmentation of the text
	in sentences and lexical items; (ii) syntactic parsing with Combinatory
	Categorial Grammar; (iii) universal semantic tagging; (iv) symbolization; and
	(v) compositional semantic analysis based on Discourse Representation Theory.
	These steps are performed using statistical models trained in a semi-supervised
	manner. The employed annotation models are all language-neutral. Our first
	results are promising.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>abzianidze-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2040">
    <title>Cross-lingual tagger evaluation without test data</title>
    <author><first>&#x17D;eljko</first><last>Agi&#x107;</last></author>
    <author><first>Barbara</first><last>Plank</last></author>
    <author><first>Anders</first><last>S&#248;gaard</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>248&#8211;253</pages>
    <url>http://www.aclweb.org/anthology/E17-2040</url>
    <abstract>We address the challenge of cross-lingual POS tagger evaluation in absence of
	manually annotated test data. We put forth and evaluate two dictionary-based
	metrics. On the tasks of accuracy prediction and system ranking, we reveal that
	these metrics are reliable enough to approximate test set-based evaluation, and
	at the same time lean enough to support assessment for truly low-resource
	languages.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>agic-plank-sogaard:2017:EACLshort</bibkey>
  </paper>

  <paper id="2041">
    <title>Legal NERC with ontologies, Wikipedia and curriculum learning</title>
    <author><first>Cristian</first><last>Cardellino</last></author>
    <author><first>Milagro</first><last>Teruel</last></author>
    <author><first>Laura</first><last>Alonso Alemany</last></author>
    <author><first>Serena</first><last>Villata</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>254&#8211;259</pages>
    <url>http://www.aclweb.org/anthology/E17-2041</url>
    <abstract>In this paper, we present a Wikipedia-based approach to develop resources for
	the legal domain. We establish a mapping between a legal domain ontology, LKIF
	(Hoekstra et al. 2007), and a Wikipedia-based ontology, YAGO (Suchanek et al.
	2007), and through that we populate LKIF. Moreover, we use the mentions of
	those entities in Wikipedia text to train a specific Named Entity Recognizer
	and Classifier. We find that this classifier works well in the Wikipedia, but,
	as could be expected, performance decreases in a corpus of judgments of the
	European Court of Human Rights. However, this tool will be used as a preprocess
	for human annotation.
	We resort to a technique called "curriculum learning" aimed to overcome
	problems of overfitting by learning increasingly more complex concepts.
	However, we find that in this particular setting, the method works best by
	learning from most specific to most general concepts, not the other way round.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>cardellino-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2042">
    <title>The Content Types Dataset: a New Resource to Explore Semantic and Functional Characteristics of Texts</title>
    <author><first>Rachele</first><last>Sprugnoli</last></author>
    <author><first>Tommaso</first><last>Caselli</last></author>
    <author><first>Sara</first><last>Tonelli</last></author>
    <author><first>Giovanni</first><last>Moretti</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>260&#8211;266</pages>
    <url>http://www.aclweb.org/anthology/E17-2042</url>
    <abstract>This paper presents a new resource, called Content Types Dataset, to promote
	the analysis of texts as a composition of units with specific semantic and
	functional roles. By developing this dataset, we also introduce a new NLP task
	for the automatic classification of Content Types. The annotation scheme and
	the dataset are described together with two sets of classification experiments.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sprugnoli-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2043">
    <title>Continuous N-gram Representations for Authorship Attribution</title>
    <author><first>Yunita</first><last>Sari</last></author>
    <author><first>Andreas</first><last>Vlachos</last></author>
    <author><first>Mark</first><last>Stevenson</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>267&#8211;273</pages>
    <url>http://www.aclweb.org/anthology/E17-2043</url>
    <abstract>This paper presents work on using continuous representations for authorship at-
	tribution. In contrast to previous work, which uses discrete feature
	representations, our model learns continuous representations for n-gram
	features via a neural network jointly with the classification layer.
	Experimental results demonstrate that the proposed model outperforms the
	state-of-the-art on two datasets, while producing comparable results on the
	remaining two.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sari-vlachos-stevenson:2017:EACLshort</bibkey>
  </paper>

  <paper id="2044">
    <title>Reconstructing the house from the ad: Structured prediction on real estate classifieds</title>
    <author><first>Giannis</first><last>Bekoulis</last></author>
    <author><first>Johannes</first><last>Deleu</last></author>
    <author><first>Thomas</first><last>Demeester</last></author>
    <author><first>Chris</first><last>Develder</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>274&#8211;279</pages>
    <url>http://www.aclweb.org/anthology/E17-2044</url>
    <abstract>In this paper, we address the (to the best of our knowledge) new problem of
	extracting a structured description of real estate properties from their
	natural language descriptions in classifieds. We survey and present several
	models to (a) identify important entities of a property (e.g.,rooms) from
	classifieds and (b) structure them into a tree format, with the entities as
	nodes and edges representing a part-of relation. Experiments show that a
	graph-based system deriving the tree from an initially fully connected entity
	graph, outperforms a transition-based system starting from only the entity
	nodes, since it better reconstructs the tree.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bekoulis-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2045">
    <title>Neural vs. Phrase-Based Machine Translation in a Multi-Domain Scenario</title>
    <author><first>M. Amin</first><last>Farajian</last></author>
    <author><first>Marco</first><last>Turchi</last></author>
    <author><first>Matteo</first><last>Negri</last></author>
    <author><first>Nicola</first><last>Bertoldi</last></author>
    <author><first>Marcello</first><last>Federico</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>280&#8211;284</pages>
    <url>http://www.aclweb.org/anthology/E17-2045</url>
    <abstract>State-of-the-art neural machine translation (NMT) systems are generally trained
	on specific domains by carefully selecting the training sets and applying
	proper domain adaptation techniques. 
	In this paper we consider the real world scenario in which the target domain is
	not predefined, hence the system should be able to translate text from multiple
	domains. We compare the performance of a generic NMT system and phrase-based
	statistical machine translation (PBMT) system by training them on a generic
	parallel corpus composed of data from different domains.
	Our results on multi-domain English-French data show that, in these realistic
	conditions, PBMT outperforms its neural counterpart. This raises the question:
	is NMT ready for deployment as a generic/multi-purpose MT backbone in
	real-world settings?</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>farajian-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2046">
    <title>Improving ROUGE for Timeline Summarization</title>
    <author><first>Sebastian</first><last>Martschat</last></author>
    <author><first>Katja</first><last>Markert</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>285&#8211;290</pages>
    <url>http://www.aclweb.org/anthology/E17-2046</url>
    <abstract>Current evaluation metrics for timeline summarization either ignore the
	temporal aspect of the task or require strict date matching. We introduce
	variants of ROUGE that allow alignment of daily summaries via temporal distance
	or semantic similarity. We argue for the suitability of these variants in a
	theoretical analysis and demonstrate it in a battery of task-specific tests.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>martschat-markert:2017:EACLshort</bibkey>
  </paper>

  <paper id="2047">
    <title>Cutting-off Redundant Repeating Generations for Neural Abstractive Summarization</title>
    <author><first>Jun</first><last>Suzuki</last></author>
    <author><first>Masaaki</first><last>Nagata</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>291&#8211;297</pages>
    <url>http://www.aclweb.org/anthology/E17-2047</url>
    <abstract>This paper tackles the reduction of redundant repeating generation that is
	often observed in RNN-based encoder-decoder models.
	 Our basic idea is to jointly estimate the upper-bound frequency of each target
	vocabulary in the encoder and control the output words based on the estimation
	in the decoder.
	 Our method shows significant improvement over a strong RNN-based
	encoder-decoder baseline and achieved its best results on an abstractive
	summarization benchmark.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>suzuki-nagata:2017:EACLshort</bibkey>
  </paper>

  <paper id="2048">
    <title>To Sing like a Mockingbird</title>
    <author><first>Lorenzo</first><last>Gatti</last></author>
    <author><first>G&#246;zde</first><last>&#214;zbal</last></author>
    <author><first>Oliviero</first><last>Stock</last></author>
    <author><first>Carlo</first><last>Strapparava</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>298&#8211;304</pages>
    <url>http://www.aclweb.org/anthology/E17-2048</url>
    <abstract>Musical parody, i.e. the act of changing the lyrics of an existing and very
	well-known song, is a commonly used technique for creating catchy advertising
	tunes and for mocking people or events. Here we describe a system for
	automatically producing a musical parody, starting from a corpus of songs. The
	system can automatically identify characterizing words and concepts related to
	a novel text, which are taken from the daily news. These concepts are then used
	as seeds to appropriately re- place part of the original lyrics of a song,
	using metrical, rhyming and lexical constraints. Finally, the parody can be
	sung with a singing speech synthesizer, with no intervention from the user.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>gatti-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2049">
    <title>K-best Iterative Viterbi Parsing</title>
    <author><first>Katsuhiko</first><last>Hayashi</last></author>
    <author><first>Masaaki</first><last>Nagata</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>305&#8211;310</pages>
    <url>http://www.aclweb.org/anthology/E17-2049</url>
    <abstract>This paper presents an efficient and optimal parsing algorithm for
	probabilistic context-free grammars (PCFGs). To achieve faster parsing, our
	proposal employs a pruning technique to reduce unnecessary edges in the search
	space. The key is to conduct repetitively Viterbi inside and outside parsing,
	while gradually expanding the search space to efficiently compute heuristic
	bounds used for pruning. Our experimental results using the English Penn
	Treebank corpus show that the proposed algorithm is faster than the standard
	CKY parsing algorithm. In
	addition, we also show how to extend this algorithm to extract k-best Viterbi
	parse trees.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>hayashi-nagata:2017:EACLshort</bibkey>
  </paper>

  <paper id="2050">
    <title>PP Attachment: Where do We Stand?</title>
    <author><first>Dani&#235;l</first><last>de Kok</last></author>
    <author><first>Jianqiang</first><last>Ma</last></author>
    <author><first>Corina</first><last>Dima</last></author>
    <author><first>Erhard</first><last>Hinrichs</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>311&#8211;317</pages>
    <url>http://www.aclweb.org/anthology/E17-2050</url>
    <abstract>Prepostitional phrase (PP) attachment is a well known challenge to parsing. In
	this paper, we combine
	the insights of different works, namely: (1) treating PP attachment as a
	classification task with an arbitrary number of attachment candidates; (2) 
	using auxiliary distributions to augment the data beyond the hand-annotated
	training set; (3)  using topological fields to get information about the
	distribution of PP attachment throughout clauses and (4) using state-of-the-art
	techniques such as word embeddings and neural networks. We show that jointly
	using these techniques leads to substantial improvements. We also conduct a
	qualitative analysis to gauge where the ceiling of the task is in a realistic
	setup.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>dekok-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2051">
    <title>Don't Stop Me Now! Using Global Dynamic Oracles to Correct Training Biases of Transition-Based Dependency Parsers</title>
    <author><first>Lauriane</first><last>Aufrant</last></author>
    <author><first>Guillaume</first><last>Wisniewski</last></author>
    <author><first>Fran&#231;ois</first><last>Yvon</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>318&#8211;323</pages>
    <url>http://www.aclweb.org/anthology/E17-2051</url>
    <abstract>This paper formalizes a sound extension of dynamic oracles to global training,
	in the frame of transition-based dependency parsers. By dispensing with the
	pre-computation of references, this extension widens the training strategies
	that can be entertained for such parsers; we show this by revisiting two
	standard training procedures, early-update and max-violation, to correct some
	of their search space sampling biases. Experimentally, on the SPMRL treebanks,
	this improvement increases the similarity between the train and test
	distributions and yields performance improvements up to 0.7 UAS, without any
	computation overhead.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>aufrant-wisniewski-yvon:2017:EACLshort</bibkey>
  </paper>

  <paper id="2052">
    <title>Joining Hands: Exploiting Monolingual Treebanks for Parsing of Code-mixing Data</title>
    <author><first>Irshad</first><last>Bhat</last></author>
    <author><first>Riyaz A.</first><last>Bhat</last></author>
    <author><first>Manish</first><last>Shrivastava</last></author>
    <author><first>Dipti</first><last>Sharma</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>324&#8211;330</pages>
    <url>http://www.aclweb.org/anthology/E17-2052</url>
    <abstract>In this paper, we propose efficient and less resource-intensive strategies for
	parsing of code-mixed data. These strategies are not constrained by in-domain
	annotations, rather they leverage pre-existing monolingual annotated resources
	for training. We show that these methods can produce significantly better
	results as compared to an informed baseline. Due to lack of an evaluation set
	for
	code-mixed structures, we also present a data set of 450 Hindi and English
	code-mixed tweets of Hindi multilingual speakers for evaluation.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bhat-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2053">
    <title>Multilingual Lexicalized Constituency Parsing with Word-Level Auxiliary Tasks</title>
    <author><first>Maximin</first><last>Coavoux</last></author>
    <author><first>Benoit</first><last>Crabb&#233;</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>331&#8211;336</pages>
    <url>http://www.aclweb.org/anthology/E17-2053</url>
    <abstract>We introduce a constituency parser based
	on a bi-LSTM encoder adapted from re-
	cent work (Cross and Huang, 2016b;
	Kiperwasser and Goldberg, 2016), which
	can incorporate a lower level character bi-
	LSTM (Ballesteros et al., 2015; Plank et
	al., 2016). We model two important in-
	terfaces of constituency parsing with aux-
	iliary tasks supervised at the word level:
	(i) part-of-speech (POS) and morpholog-
	ical tagging, (ii) functional label predic-
	tion. On the SPMRL dataset, our parser
	obtains above state-of-the-art results on
	constituency parsing without requiring ei-
	ther predicted POS or morphological tags,
	and outputs labelled dependency trees.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>coavoux-crabbe:2017:EACLshort</bibkey>
  </paper>

  <paper id="2054">
    <title>Be Precise or Fuzzy: Learning the Meaning of Cardinals and Quantifiers from Vision</title>
    <author><first>Sandro</first><last>Pezzelle</last></author>
    <author><first>Marco</first><last>Marelli</last></author>
    <author><first>Raffaella</first><last>Bernardi</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>337&#8211;342</pages>
    <url>http://www.aclweb.org/anthology/E17-2054</url>
    <abstract>People can refer to quantities in a visual scene by using either exact
	cardinals (e.g. one, two, three) or natural language quantifiers (e.g. few,
	most, all). In humans, these two processes underlie fairly different cognitive
	and neural mechanisms. Inspired by this evidence, the present study proposes
	two models for learning the objective meaning of cardinals and quantifiers from
	visual scenes containing multiple objects. We show that a model capitalizing on
	a 'fuzzy' measure of similarity is effective for learning quantifiers,
	whereas the learning of exact cardinals is better accomplished when information
	about number is provided.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>pezzelle-marelli-bernardi:2017:EACLshort</bibkey>
  </paper>

  <paper id="2055">
    <title>Improving a Strong Neural Parser with Conjunction-Specific Features</title>
    <author><first>Jessica</first><last>Ficler</last></author>
    <author><first>Yoav</first><last>Goldberg</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>343&#8211;348</pages>
    <url>http://www.aclweb.org/anthology/E17-2055</url>
    <abstract>While dependency parsers reach very high overall accuracy, some dependency
	relations are much harder than others. 
	In particular, dependency parsers perform poorly in coordination construction
	(i.e., correctly attaching the conj relation).
	We extend a state-of-the-art dependency parser with conjunction-specific
	features, focusing on the similarity between the conjuncts head words. Training
	the extended parser yields an improvement in conj attachment as well as in
	overall dependency parsing accuracy on the Stanford dependency conversion of
	the Penn TreeBank.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>ficler-goldberg:2017:EACLshort</bibkey>
  </paper>

  <paper id="2056">
    <title>Neural Automatic Post-Editing Using Prior Alignment and Reranking</title>
    <author><first>Santanu</first><last>Pal</last></author>
    <author><first>Sudip Kumar</first><last>Naskar</last></author>
    <author><first>Mihaela</first><last>Vela</last></author>
    <author><first>Qun</first><last>Liu</last></author>
    <author><first>Josef</first><last>van Genabith</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>349&#8211;355</pages>
    <url>http://www.aclweb.org/anthology/E17-2056</url>
    <abstract>We present a second-stage machine translation (MT) system based on a neural
	machine translation (NMT) approach to automatic post-editing (APE) that
	improves the translation quality provided by a first-stage MT system.
	Our APE system (APE_Sym) is an extended version of an attention based NMT model
	with bilingual symmetry employing bidirectional models, mt&#8211;pe and pe&#8211;mt.
	APE translations produced by our system show statistically significant
	improvements over the first-stage MT, phrase-based APE and the best reported
	score on the WMT 2016 APE dataset by a previous neural APE system.
	Re-ranking (APE_Rerank) of the n-best translations from the phrase-based APE
	and APE_Sym systems provides further substantial improvements over the
	symmetric neural APE model.
	Human evaluation confirms that the APE_Rerank generated PE translations improve
	on the previous best neural APE system at WMT 2016.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>pal-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2057">
    <title>Improving Evaluation of Document-level Machine Translation Quality Estimation</title>
    <author><first>Yvette</first><last>Graham</last></author>
    <author><first>Qingsong</first><last>Ma</last></author>
    <author><first>Timothy</first><last>Baldwin</last></author>
    <author><first>Qun</first><last>Liu</last></author>
    <author><first>Carla</first><last>Parra</last></author>
    <author><first>Carolina</first><last>Scarton</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>356&#8211;361</pages>
    <url>http://www.aclweb.org/anthology/E17-2057</url>
    <abstract>Meaningful conclusions about the relative performance of NLP systems are only
	possible if the gold standard employed in a given evaluation is both valid and
	reliable. In this paper, we explore the validity of human annotations currently
	employed in the evaluation of document-level quality estimation for machine
	translation (MT). We demonstrate the degree to which MT system rankings are
	dependent on weights employed in the construction of the gold standard, before
	proposing direct human assessment as a valid alternative.
	Experiments show direct assessment (DA) scores for documents to be highly
	reliable, achieving a correlation of above 0.9 in a self-replication
	experiment, in addition to a substantial estimated cost reduction through
	quality
	controlled crowd-sourcing. The original gold standard based on post-edits
	incurs
	a 10&#8211;20 times greater cost than DA.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>graham-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2058">
    <title>Neural Machine Translation by Minimising the Bayes-risk with Respect to Syntactic Translation Lattices</title>
    <author><first>Felix</first><last>Stahlberg</last></author>
    <author><first>Adri&#224;</first><last>de Gispert</last></author>
    <author><first>Eva</first><last>Hasler</last></author>
    <author><first>Bill</first><last>Byrne</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>362&#8211;368</pages>
    <url>http://www.aclweb.org/anthology/E17-2058</url>
    <abstract>We present a novel scheme to combine neural machine translation (NMT) with
	traditional statistical machine translation (SMT). Our approach borrows ideas
	from linearised lattice minimum Bayes-risk decoding for SMT. The NMT score is
	combined with the Bayes-risk of the translation according the SMT lattice. This
	makes our approach much more flexible than n-best list or lattice rescoring
	as the neural decoder is not restricted to the SMT search space. We show an
	efficient and simple way to integrate risk estimation into the NMT decoder
	which is suitable for word-level as well as subword-unit-level NMT. We test our
	method on English-German and Japanese-English and report significant gains over
	lattice rescoring on several data sets for both single and ensembled NMT. The
	MBR decoder produces entirely new hypotheses far beyond simply rescoring the
	SMT search space or fixing UNKs in the NMT output.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>stahlberg-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2059">
    <title>Producing Unseen Morphological Variants in Statistical Machine Translation</title>
    <author><first>Matthias</first><last>Huck</last></author>
    <author><first>Ale&#x161;</first><last>Tamchyna</last></author>
    <author><first>Ond&#x159;ej</first><last>Bojar</last></author>
    <author><first>Alexander</first><last>Fraser</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>369&#8211;375</pages>
    <url>http://www.aclweb.org/anthology/E17-2059</url>
    <abstract>Translating into morphologically rich languages is difficult. Although the
	coverage of lemmas may be reasonable, many morphological variants cannot be
	learned from the training data. We present a statistical translation system
	that is able to produce these inflected word forms. Different from most
	previous work, we do not separate morphological prediction from lexical choice
	into two consecutive steps. Our approach is novel in that it is integrated in
	decoding and takes advantage of context information from both the source
	language and the target language sides.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>huck-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2060">
    <title>How Grammatical is Character-level Neural Machine Translation? Assessing MT Quality with Contrastive Translation Pairs</title>
    <author><first>Rico</first><last>Sennrich</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>376&#8211;382</pages>
    <url>http://www.aclweb.org/anthology/E17-2060</url>
    <abstract>Analysing translation quality in regards to specific linguistic phenomena has
	historically been difficult and time-consuming. Neural machine translation has
	the attractive property that it can produce scores for arbitrary translations,
	and we propose a novel method to assess how well NMT systems model specific
	linguistic phenomena such as agreement over long distances, the production of
	novel words, and the faithful translation of polarity. The core idea is that we
	measure whether a reference translation is more probable under a NMT model than
	a contrastive translation which introduces a specific type of error. We present
	LingEval97, a large-scale data set of 97000 contrastive translation pairs based
	on the WMT English->German translation task, with errors automatically created
	with simple rules. We report results for a number of systems, and find that
	recently introduced character-level NMT systems perform better at
	transliteration than models with byte-pair encoding (BPE) segmentation, but
	perform more poorly at morphosyntactic agreement, and translating discontiguous
	units of meaning.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sennrich:2017:EACLshort</bibkey>
  </paper>

  <paper id="2061">
    <title>Neural Machine Translation with Recurrent Attention Modeling</title>
    <author><first>Zichao</first><last>Yang</last></author>
    <author><first>Zhiting</first><last>Hu</last></author>
    <author><first>Yuntian</first><last>Deng</last></author>
    <author><first>Chris</first><last>Dyer</last></author>
    <author><first>Alex</first><last>Smola</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>383&#8211;387</pages>
    <url>http://www.aclweb.org/anthology/E17-2061</url>
    <abstract>Knowing which words have been attended to in previous time steps while
	generating a translation is a rich source of information for predicting what
	words will be attended to in the future. We improve upon the attention model of
	Bahdanau et al. (2014) by explicitly modeling the relationship between previous
	and subsequent attention levels for each word using one recurrent network per
	input word. This architecture easily captures informative features, such as
	fertility and regularities in relative distortion. In experiments, we show our
	parameterization of attention improves translation quality.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>yang-EtAl:2017:EACLshort1</bibkey>
  </paper>

  <paper id="2062">
    <title>Inducing Embeddings for Rare and Unseen Words by Leveraging Lexical Resources</title>
    <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
    <author><first>Nigel</first><last>Collier</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>388&#8211;393</pages>
    <url>http://www.aclweb.org/anthology/E17-2062</url>
    <abstract>We put forward an approach that exploits the knowledge encoded in lexical
	resources in order to induce representations for words that were not
	encountered frequently during training. Our approach provides an advantage over
	the past work in that it enables vocabulary expansion not only for
	morphological variations, but also for infrequent domain specific terms. We
	performed evaluations in different settings, showing that the technique can
	provide consistent improvements on multiple benchmarks across domains.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>pilehvar-collier:2017:EACLshort</bibkey>
  </paper>

  <paper id="2063">
    <title>Large-scale evaluation of dependency-based DSMs: Are they worth the effort?</title>
    <author><first>Gabriella</first><last>Lapesa</last></author>
    <author><first>Stefan</first><last>Evert</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>394&#8211;400</pages>
    <url>http://www.aclweb.org/anthology/E17-2063</url>
    <abstract>This paper presents a large-scale evaluation study of dependency-based
	distributional semantic models. We evaluate dependency-filtered and
	dependency-structured DSMs in a number of standard semantic similarity tasks,
	systematically exploring their parameter space in order to give them a "fair
	shot" against window-based models.  Our results show that properly tuned
	window-based DSMs still outperform the dependency-based models in most tasks. 
	There appears to be little need for the language-dependent resources and
	computational cost associated with syntactic analysis.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>lapesa-evert:2017:EACLshort</bibkey>
  </paper>

  <paper id="2064">
    <title>How Well Can We Predict Hypernyms from Word Embeddings? A Dataset-Centric Analysis</title>
    <author><first>Ivan</first><last>Sanchez</last></author>
    <author><first>Sebastian</first><last>Riedel</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>401&#8211;407</pages>
    <url>http://www.aclweb.org/anthology/E17-2064</url>
    <abstract>One key property of word embeddings currently under study is their capacity to
	encode hypernymy. Previous works have used supervised models to recover
	hypernymy structures from embeddings. However, the overall results do not
	clearly show how well we can recover such structures. We conduct the first
	dataset-centric analysis that shows how only the Baroni dataset provides
	consistent results. We empirically show that a possible reason for its good
	performance is its alignment to dimensions specific of hypernymy: generality
	and similarity</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sanchez-riedel:2017:EACLshort</bibkey>
  </paper>

  <paper id="2065">
    <title>Cross-Lingual Syntactically Informed Distributed Word Representations</title>
    <author><first>Ivan</first><last>Vuli&#x107;</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>408&#8211;414</pages>
    <url>http://www.aclweb.org/anthology/E17-2065</url>
    <abstract>We develop a novel cross-lingual word representation model which injects
	syntactic information through dependency-based contexts into a shared
	cross-lingual word vector space. The model, termed CL-DepEmb, is based on the
	following assumptions: (1) dependency relations are largely
	language-independent, at least for related languages and prominent dependency
	links such as direct objects, as evidenced by the Universal Dependencies
	project; (2) word translation equivalents take similar grammatical roles in a
	sentence and are therefore substitutable within their syntactic contexts.
	Experiments with several language pairs on word similarity and bilingual
	lexicon induction, two fundamental semantic tasks emphasising semantic
	similarity, suggest the usefulness of the proposed syntactically informed
	cross-lingual word vector spaces. Improvements are observed in both tasks over
	standard cross-lingual "offline mapping" baselines trained using the same setup
	and an equal level of bilingual supervision.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>vulic:2017:EACLshort</bibkey>
  </paper>

  <paper id="2066">
    <title>Using Word Embedding for Cross-Language Plagiarism Detection</title>
    <author><first>J&#233;r&#233;my</first><last>Ferrero</last></author>
    <author><first>Laurent</first><last>Besacier</last></author>
    <author><first>Didier</first><last>Schwab</last></author>
    <author><first>Fr&#233;d&#233;ric</first><last>Agn&#232;s</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>415&#8211;421</pages>
    <url>http://www.aclweb.org/anthology/E17-2066</url>
    <abstract>This paper proposes to use distributed representation of words (word
	embeddings) in cross-language textual similarity detection. The main
	contributions of this paper are the following: (a) we introduce new
	cross-language similarity detection methods based on distributed representation
	of words; (b) we combine the different methods proposed to verify their
	complementarity and finally obtain an overall F1 score of 89.15% for
	English-French similarity detection at chunk level (88.5% at sentence level) on
	a very challenging corpus.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>ferrero-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2067">
    <title>The Interplay of Semantics and Morphology in Word Embeddings</title>
    <author><first>Oded</first><last>Avraham</last></author>
    <author><first>Yoav</first><last>Goldberg</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>422&#8211;426</pages>
    <url>http://www.aclweb.org/anthology/E17-2067</url>
    <abstract>We explore the ability of word embeddings
	to capture both semantic and morphological
	similarity, as affected by the
	different types of linguistic properties
	(surface form, lemma, morphological tag)
	used to compose the representation of each
	word. We train several models, where
	each uses a different subset of these properties
	to compose its representations. By
	evaluating the models on semantic and
	morphological measures, we reveal some
	useful insights on the relationship between
	semantics and morphology.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>avraham-goldberg:2017:EACLshort</bibkey>
  </paper>

  <paper id="2068">
    <title>Bag of Tricks for Efficient Text Classification</title>
    <author><first>Armand</first><last>Joulin</last></author>
    <author><first>Edouard</first><last>Grave</last></author>
    <author><first>Piotr</first><last>Bojanowski</last></author>
    <author><first>Tomas</first><last>Mikolov</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>427&#8211;431</pages>
    <url>http://www.aclweb.org/anthology/E17-2068</url>
    <abstract>This paper explores a simple and efficient baseline for text classification.
	Our experiments show that our fast text classifier fastText is often on par
	with deep learning classifiers in terms of accuracy, and many orders of
	magnitude faster for training and evaluation.  We can train fastText on more
	than one billion words in less than ten minutes using a standard multicore~CPU,
	and classify half a million sentences among~312K classes in less than a minute.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>joulin-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2069">
    <title>Pulling Out the Stops: Rethinking Stopword Removal for Topic Models</title>
    <author><first>Alexandra</first><last>Schofield</last></author>
    <author><first>M&#229;ns</first><last>Magnusson</last></author>
    <author><first>David</first><last>Mimno</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>432&#8211;436</pages>
    <url>http://www.aclweb.org/anthology/E17-2069</url>
    <abstract>It is often assumed that topic models benefit from the use of a manually
	curated stopword list. Constructing this list is time-consuming and often
	subject to user judgments about what kinds of words are important to the model
	and the application. Although stopword removal clearly affects which word types
	appear as most probable terms in topics, we argue that this improvement is
	superficial, and that topic inference benefits little from the practice of
	removing stopwords beyond very frequent terms. Removing corpus-specific
	stopwords after model inference is more transparent and produces similar
	results to removing those words prior to inference.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>schofield-magnusson-mimno:2017:EACLshort</bibkey>
  </paper>

  <paper id="2070">
    <title>Measuring Topic Coherence through Optimal Word Buckets</title>
    <author><first>Nitin</first><last>Ramrakhiyani</last></author>
    <author><first>Sachin</first><last>Pawar</last></author>
    <author><first>Swapnil</first><last>Hingmire</last></author>
    <author><first>Girish</first><last>Palshikar</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>437&#8211;442</pages>
    <url>http://www.aclweb.org/anthology/E17-2070</url>
    <abstract>Measuring topic quality is essential for scoring the learned topics and their
	subsequent use in Information Retrieval and Text classification. To measure
	quality of Latent Dirichlet Allocation (LDA) based topics learned from text, we
	propose a novel approach based on grouping of topic words into buckets
	(TBuckets). A single large bucket signifies a single coherent theme, in turn
	indicating high topic coherence. TBuckets uses word embeddings of topic words
	and employs singular value decomposition (SVD) and Integer Linear Programming
	based optimization to create coherent word buckets. TBuckets outperforms the
	state-of-the-art techniques when evaluated using 3 publicly available datasets
	and on another one proposed in this paper.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>ramrakhiyani-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2071">
    <title>A Hybrid CNN-RNN Alignment Model for Phrase-Aware Sentence Classification</title>
    <author><first>Shiou Tian</first><last>Hsu</last></author>
    <author><first>Changsung</first><last>Moon</last></author>
    <author><first>Paul</first><last>Jones</last></author>
    <author><first>Nagiza</first><last>Samatova</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>443&#8211;449</pages>
    <url>http://www.aclweb.org/anthology/E17-2071</url>
    <abstract>The success of sentence classification often depends on understanding both the
	syntactic and semantic properties of word-phrases. Recent progress on this task
	has been based on exploiting the grammatical structure of sentences but often
	this structure is difficult to parse and noisy. In this paper, we propose a
	structure-independent `Gated Representation Alignment' (GRA) model that blends
	a phrase-focused Convolutional Neural Network (CNN) approach with
	sequence-oriented Recurrent Neural Network (RNN). Our novel alignment mechanism
	allows the RNN to selectively include phrase information in a word-by-word
	sentence representation, and to do this without awareness of the syntactic
	structure. An empirical evaluation of GRA shows higher prediction accuracy (up
	to 4.6%) of fine-grained sentiment ratings, when compared to other
	structure-independent baselines. We also show comparable results to several
	structure-dependent methods. Finally, we analyzed the effect of our alignment
	mechanism and found that this is critical to the effectiveness of the CNN-RNN
	hybrid.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>hsu-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2072">
    <title>Multivariate Gaussian Document Representation from Word Embeddings for Text Categorization</title>
    <author><first>Giannis</first><last>Nikolentzos</last></author>
    <author><first>Polykarpos</first><last>Meladianos</last></author>
    <author><first>Francois</first><last>Rousseau</last></author>
    <author><first>Yannis</first><last>Stavrakas</last></author>
    <author><first>Michalis</first><last>Vazirgiannis</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>450&#8211;455</pages>
    <url>http://www.aclweb.org/anthology/E17-2072</url>
    <abstract>Recently, there has been a lot of activity in learning distributed
	representations of words in vector spaces. Although there are models capable of
	learning high-quality distributed representations of words, how to generate
	vector representations of the same quality for phrases or documents still
	remains a challenge. In this paper, we propose to model each document as a
	multivariate Gaussian distribution based on the distributed representations of
	its words. We then measure the similarity between two documents based on the
	similarity of their distributions. Experiments on eight standard text
	categorization datasets demonstrate the effectiveness of the proposed approach
	in comparison with state-of-the-art methods.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>nikolentzos-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2073">
    <title>Derivation of Document Vectors from Adaptation of LSTM Language Model</title>
    <author><first>Wei</first><last>Li</last></author>
    <author><first>Brian</first><last>Mak</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>456&#8211;461</pages>
    <url>http://www.aclweb.org/anthology/E17-2073</url>
    <abstract>In many natural language processing (NLP) tasks, a document is commonly modeled
	as a bag of words using the term frequency-inverse document frequency (TF-IDF)
	vector. One major shortcoming of the frequency-based TF-IDF feature vector is
	that it ignores word orders that carry syntactic and semantic relationships
	among the words in a document. This paper proposes a novel distributed vector
	representation of a document, which will be labeled as DV-LSTM, and is derived
	from the result of adapting a long short-term memory recurrent neural network
	language model by the document. DV-LSTM is expected to capture some high-level
	sequential information in the document, which other current document
	representations fail to do. It was evaluated in document genre classification
	in the Brown Corpus and the BNC Baby Corpus. The results show that DV-LSTM
	significantly outperforms TF-IDF vector and paragraph vector (PV-DM) in most
	cases, and their combinations may further improve the classification
	performance.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>li-mak:2017:EACLshort</bibkey>
  </paper>

  <paper id="2074">
    <title>Real-Time Keyword Extraction from Conversations</title>
    <author><first>Polykarpos</first><last>Meladianos</last></author>
    <author><first>Antoine</first><last>Tixier</last></author>
    <author><first>Ioannis</first><last>Nikolentzos</last></author>
    <author><first>Michalis</first><last>Vazirgiannis</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>462&#8211;467</pages>
    <url>http://www.aclweb.org/anthology/E17-2074</url>
    <abstract>We introduce a novel method to extract keywords from meeting speech in
	real-time. Our approach builds on the graph-of-words representation of text and
	leverages the k-core decomposition algorithm and properties of submodular
	functions. We outperform multiple baselines in a real-time scenario emulated
	from the AMI and ICSI meeting corpora. Evaluation is conducted against both
	extractive and abstractive gold standard using two standard performance metrics
	and a newer one based on word embeddings.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>meladianos-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2075">
    <title>A Copy-Augmented Sequence-to-Sequence Architecture Gives Good Performance on Task-Oriented Dialogue</title>
    <author><first>Mihail</first><last>Eric</last></author>
    <author><first>Christopher</first><last>Manning</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>468&#8211;473</pages>
    <url>http://www.aclweb.org/anthology/E17-2075</url>
    <abstract>Task-oriented dialogue focuses on conversational agents that participate in
	dialogues with user goals on domain-specific topics. In contrast to chatbots,
	which simply seek to sustain open-ended meaningful discourse, existing
	task-oriented agents usually explicitly model user intent and belief states.
	This paper examines bypassing such an explicit representation by depending on a
	latent neural embedding of state and learning selective attention to dialogue
	history together with copying to  incorporate relevant prior context. We
	complement recent work by showing the effectiveness of simple
	sequence-to-sequence neural architectures with a copy mechanism. Our model
	outperforms more complex memory-augmented models by 7% in per-response
	generation and is on par with the current state-of-the-art on DSTC2, a
	real-world task-oriented dialogue dataset.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>eric-manning:2017:EACLshort</bibkey>
  </paper>

  <paper id="2076">
    <title>Towards speech-to-text translation without speech recognition</title>
    <author><first>Sameer</first><last>Bansal</last></author>
    <author><first>Herman</first><last>Kamper</last></author>
    <author><first>Adam</first><last>Lopez</last></author>
    <author><first>Sharon</first><last>Goldwater</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>474&#8211;479</pages>
    <url>http://www.aclweb.org/anthology/E17-2076</url>
    <abstract>We explore the problem of translating speech to text in low-resource scenarios
	where neither automatic speech recognition (ASR) nor machine translation (MT)
	are available, but we have training data in the form of audio paired with text
	translations. We present the first system for this problem applied to a
	realistic multi-speaker dataset, the CALLHOME Spanish-English speech
	translation corpus. Our approach uses un- supervised term discovery (UTD) to
	cluster repeated patterns in the audio, creating a pseudotext, which we pair
	with translations to create a parallel text and train a simple bag-of-words MT
	model. We identify the challenges faced by the system, finding that the
	difficulty of cross-speaker UTD results in low recall, but that our system is
	still able to correctly translate some content words in test data.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bansal-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2077">
    <title>Evaluating Persuasion Strategies and Deep Reinforcement Learning methods for Negotiation Dialogue agents</title>
    <author><first>Simon</first><last>Keizer</last></author>
    <author><first>Markus</first><last>Guhe</last></author>
    <author><first>Heriberto</first><last>Cuayahuitl</last></author>
    <author><first>Ioannis</first><last>Efstathiou</last></author>
    <author><first>Klaus-Peter</first><last>Engelbrecht</last></author>
    <author><first>Mihai</first><last>Dobre</last></author>
    <author><first>Alex</first><last>Lascarides</last></author>
    <author><first>Oliver</first><last>Lemon</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>480&#8211;484</pages>
    <url>http://www.aclweb.org/anthology/E17-2077</url>
    <abstract>In this paper we present a comparative evaluation of various negotiation 
	strategies within an online version of the game &#x201c;Settlers of Catan&#x201d;.
	The comparison is based on human subjects playing games against   
	artificial game-playing agents (`bots') which implement different negotiation
	dialogue strategies, using a chat dialogue interface to negotiate trades. Our
	results suggest that a negotiation strategy that uses persuasion, 
	as well as a strategy that is trained from data using Deep Reinforcement 
	Learning, both lead to an improved win rate against humans, compared to 
	previous rule-based and supervised learning baseline dialogue negotiators.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>keizer-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2078">
    <title>Unsupervised Dialogue Act Induction using Gaussian Mixtures</title>
    <author><first>Tom&#225;&#x161;</first><last>Brychc&#237;n</last></author>
    <author><first>Pavel</first><last>Kr&#225;l</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>485&#8211;490</pages>
    <url>http://www.aclweb.org/anthology/E17-2078</url>
    <abstract>This paper introduces a new unsupervised approach for dialogue act induction.
	Given the sequence of dialogue utterances, the task is to assign them the
	labels representing their function in the dialogue.
	Utterances are represented as real-valued vectors encoding their meaning. We
	model the dialogue as Hidden Markov model with emission probabilities estimated
	by Gaussian mixtures. We use Gibbs sampling for posterior inference.
	We present the results on the standard Switchboard-DAMSL corpus. Our algorithm
	achieves promising results compared with strong supervised baselines and
	outperforms other unsupervised algorithms.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>brychcin-kral:2017:EACLshort</bibkey>
  </paper>

  <paper id="2079">
    <title>Grounding Language by Continuous Observation of Instruction Following</title>
    <author><first>Ting</first><last>Han</last></author>
    <author><first>David</first><last>Schlangen</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>491&#8211;496</pages>
    <url>http://www.aclweb.org/anthology/E17-2079</url>
    <abstract>Grounded semantics is typically learnt from utterance-level meaning
	representations (e.g., successful database retrievals, denoted objects in
	images, moves in a game). We explore learning word and utterance meanings by
	continuous observation of the actions of an instruction follower (IF). While an
	instruction giver (IG) provided a verbal description of a configuration of
	objects, IF recreated it using a GUI. Aligning these GUI actions to sub-
	utterance chunks allows a simple maxi- mum entropy model to associate them as
	chunk meaning better than just providing it with the utterance-final
	configuration. This shows that semantics useful for incremental (word-by-word)
	application, as required in natural dialogue, might also be better acquired
	from incremental settings.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>han-schlangen:2017:EACLshort</bibkey>
  </paper>

  <paper id="2080">
    <title>Mapping the Perfect via Translation Mining</title>
    <author><first>Martijn</first><last>van der Klis</last></author>
    <author><first>Bert</first><last>Le Bruyn</last></author>
    <author><first>Henri&#235;tte</first><last>de Swart</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>497&#8211;502</pages>
    <url>http://www.aclweb.org/anthology/E17-2080</url>
    <abstract>Semantic analyses of the Perfect often defeat their own purpose: by restricting
	their attention to 'real' perfects (like the English one), they implicitly
	assume the Perfect has predefined meanings and usages. We turn the tables and
	focus on form, using data extracted from multilingual parallel corpora to
	automatically generate semantic maps (Haspelmath, 1997) of the sequence
	'Have/Be + past participle' in five European languages (German, English,
	Spanish, French, Dutch). This technique, which we dub Translation Mining, has
	been applied before in the lexical domain (Walchli and Cysouw, 2012) but we
	showcase its application at the level of the grammar.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>vanderklis-lebruyn-deswart:2017:EACLshort</bibkey>
  </paper>

  <paper id="2081">
    <title>Efficient, Compositional, Order-sensitive n-gram Embeddings</title>
    <author><first>Adam</first><last>Poliak</last></author>
    <author><first>Pushpendre</first><last>Rastogi</last></author>
    <author><first>M. Patrick</first><last>Martin</last></author>
    <author><first>Benjamin</first><last>Van Durme</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>503&#8211;508</pages>
    <url>http://www.aclweb.org/anthology/E17-2081</url>
    <abstract>We propose ECO: a new way to generate embeddings for phrases that is Efficient,
	Compositional, and Order-sensitive. Our method creates decompositional
	embeddings for words offline and combines them to create new embeddings for
	phrases in real time. Unlike other approaches, ECO can create embeddings for
	phrases not seen during training. We evaluate ECO on supervised and
	unsupervised tasks and demonstrate that creating phrase embeddings that are
	sensitive to word order can help downstream tasks.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>poliak-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2082">
    <title>Integrating Semantic Knowledge into Lexical Embeddings Based on Information Content Measurement</title>
    <author><first>Hsin-Yang</first><last>Wang</last></author>
    <author><first>Wei-Yun</first><last>Ma</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>509&#8211;515</pages>
    <url>http://www.aclweb.org/anthology/E17-2082</url>
    <abstract>Distributional word representations are widely used in NLP tasks. These
	representations are based on an assumption that words with a similar context
	tend to have a similar meaning. To improve the quality of the context-based
	embeddings, many researches have explored how to make full use of existing
	lexical resources. In this paper, we argue that while we incorporate the prior
	knowledge with context-based embeddings, words with different occurrences
	should be treated differently. Therefore, we propose to rely on the measurement
	of information content to control the degree of applying prior knowledge into
	context-based embeddings - different words would have different learning rates
	when adjusting their embeddings. In the result, we demonstrate that our
	embeddings get significant improvements on two different tasks: Word Similarity
	and Analogical Reasoning.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>wang-ma:2017:EACLshort</bibkey>
  </paper>

  <paper id="2083">
    <title>Improving Neural Knowledge Base Completion with Cross-Lingual Projections</title>
    <author><first>Patrick</first><last>Klein</last></author>
    <author><first>Simone Paolo</first><last>Ponzetto</last></author>
    <author><first>Goran</first><last>Glava&#x161;</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>516&#8211;522</pages>
    <url>http://www.aclweb.org/anthology/E17-2083</url>
    <abstract>In this paper we present a cross-lingual extension of a neural tensor network
	model for knowledge base completion. We exploit multilingual synsets from
	BabelNet to translate English triples to other languages and then augment the
	reference knowledge base with cross-lingual triples. We project monolingual
	embeddings of different languages to a shared multilingual space and use them
	for network initialization (i.e., as initial concept embeddings). We then train
	the network with triples from the cross-lingually augmented knowledge base.
	Results on WordNet link prediction show that leveraging cross-lingual
	information yields significant gains over exploiting only monolingual triples.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>klein-ponzetto-glavavs:2017:EACLshort</bibkey>
  </paper>

  <paper id="2084">
    <title>Modelling metaphor with attribute-based semantics</title>
    <author><first>Luana</first><last>Bulat</last></author>
    <author><first>Stephen</first><last>Clark</last></author>
    <author><first>Ekaterina</first><last>Shutova</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>523&#8211;528</pages>
    <url>http://www.aclweb.org/anthology/E17-2084</url>
    <abstract>One of the key problems in computational metaphor modelling is finding the
	optimal level of abstraction of semantic representations, such that these are
	able to capture and generalise metaphorical mechanisms. In this paper we
	present the first metaphor identification method that uses representations
	constructed from property norms. Such norms have been previously shown to
	provide a cognitively plausible representation of concepts in terms of semantic
	properties. Our results demonstrate that such property-based semantic
	representations provide a suitable model of cross-domain knowledge projection
	in metaphors, outperforming standard distributional models on a metaphor
	identification task.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bulat-clark-shutova:2017:EACLshort</bibkey>
  </paper>

  <paper id="2085">
    <title>When a Red Herring in Not a Red Herring: Using Compositional Methods to Detect Non-Compositional Phrases</title>
    <author><first>Julie</first><last>Weeds</last></author>
    <author><first>Thomas</first><last>Kober</last></author>
    <author><first>Jeremy</first><last>Reffin</last></author>
    <author><first>David</first><last>Weir</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>529&#8211;534</pages>
    <url>http://www.aclweb.org/anthology/E17-2085</url>
    <abstract>Non-compositional phrases such as red herring and weakly compositional
	phrases such as spelling bee are an integral part of natural language
	(Sag, 2002).  They are also the phrases that are difficult, or even
	impossible, for good compositional distributional models of semantics. 
	Compositionality detection therefore provides a good testbed for compositional
	methods. We compare an integrated compositional distributional approach, using
	sparse high dimensional representations, with the ad-hoc compositional approach
	of applying simple composition operations to state-of-the-art neural
	embeddings.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>weeds-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2086">
    <title>Applying Multi-Sense Embeddings for German Verbs to Determine Semantic Relatedness and to Detect Non-Literal Language</title>
    <author><first>Maximilian</first><last>K&#246;per</last></author>
    <author><first>Sabine</first><last>Schulte im Walde</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>535&#8211;542</pages>
    <url>http://www.aclweb.org/anthology/E17-2086</url>
    <abstract>Up to date, the majority of computational models still determines the semantic
	relatedness between words (or larger linguistic units) on the type level. In
	this paper, we compare and extend multi-sense embeddings, in order to model and
	utilise
	word senses on the token level. We focus on the challenging class of complex
	verbs,
	and evaluate the model variants on various semantic tasks: semantic
	classification;
	predicting compositionality; and detecting non-literal language usage. While
	there is no overall best model, all models significantly outperform a word2vec
	single-sense skip baseline, thus demonstrating the need to distinguish between
	word senses in a distributional semantic model.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>koper-schulteimwalde:2017:EACLshort</bibkey>
  </paper>

  <paper id="2087">
    <title>Negative Sampling Improves Hypernymy Extraction Based on Projection Learning</title>
    <author><first>Dmitry</first><last>Ustalov</last></author>
    <author><first>Nikolay</first><last>Arefyev</last></author>
    <author><first>Chris</first><last>Biemann</last></author>
    <author><first>Alexander</first><last>Panchenko</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>543&#8211;550</pages>
    <url>http://www.aclweb.org/anthology/E17-2087</url>
    <abstract>We present a new approach to extraction of hypernyms based on projection
	learning and word embeddings. In contrast to classification-based approaches,
	projection-based methods require no candidate hyponym-hypernym pairs. While it
	is natural to use both positive and negative training examples in supervised
	relation extraction, the impact of positive examples on hypernym prediction was
	not studied so far. In this paper, we show that explicit negative examples used
	for regularization of the model significantly improve performance compared to
	the state-of-the-art approach of Fu et al. (2014) on three datasets from
	different languages.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>ustalov-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2088">
    <title>A Dataset for Multi-Target Stance Detection</title>
    <author><first>Parinaz</first><last>Sobhani</last></author>
    <author><first>Diana</first><last>Inkpen</last></author>
    <author><first>Xiaodan</first><last>Zhu</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>551&#8211;557</pages>
    <url>http://www.aclweb.org/anthology/E17-2088</url>
    <abstract>Current models for stance classification often treat each target independently,
	but in many applications, there exist natural dependencies among targets, e.g.,
	stance towards two or more politicians in an election or towards several brands
	of the same product. In this paper, we focus on the problem of multi-target
	stance detection. We present a new dataset that we built for this task.
	Furthermore, We experiment with several neural models on the dataset and show
	that they are more effective in jointly modeling the overall position towards
	two related targets compared to independent predictions and other models of
	joint learning, such as cascading classification. We make the new dataset
	publicly available, in order to facilitate further research in multi-target
	stance classification.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sobhani-inkpen-zhu:2017:EACLshort</bibkey>
  </paper>

  <paper id="2089">
    <title>Single and Cross-domain Polarity Classification using String Kernels</title>
    <author><first>Rosa M.</first><last>Gim&#233;nez-P&#233;rez</last></author>
    <author><first>Marc</first><last>Franco-Salvador</last></author>
    <author><first>Paolo</first><last>Rosso</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>558&#8211;563</pages>
    <url>http://www.aclweb.org/anthology/E17-2089</url>
    <abstract>The polarity classification task aims at automatically identifying  whether a
	subjective text is positive or negative. When the target domain is different
	from those where a model was trained, we refer to a cross-domain setting. That
	setting usually implies the use of a domain adaptation method. In this work, we
	study the single and cross-domain polarity classification tasks from the string
	kernels perspective. Contrary to classical domain adaptation methods, which
	employ texts from both domains to detect pivot features, we do not use the
	target domain for training. Our approach detects the lexical peculiarities that
	characterise the text polarity and maps them into a domain independent space by
	means of kernel discriminant analysis. Experimental results show
	state-of-the-art performance in single and cross-domain polarity
	classification.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>gimenezperez-francosalvador-rosso:2017:EACLshort</bibkey>
  </paper>

  <paper id="2090">
    <title>Predicting Emotional Word Ratings using Distributional Representations and Signed Clustering</title>
    <author><first>Joao</first><last>Sedoc</last></author>
    <author><first>Daniel</first><last>Preo&#x163;iuc-Pietro</last></author>
    <author><first>Lyle</first><last>Ungar</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>564&#8211;571</pages>
    <url>http://www.aclweb.org/anthology/E17-2090</url>
    <abstract>Inferring the emotional content of words is important for text-based sentiment
	analysis, dialogue systems and psycholinguistics, but word ratings are
	expensive to collect at scale and across languages or domains. We develop a
	method that automatically extends word-level ratings to unrated words using
	signed clustering of vector space word representations along with affect
	ratings. We use our method to determine a word's valence and arousal, which
	determine its position on the circumplex model of affect, the most popular
	dimensional model of emotion. Our method achieves superior out-of-sample word
	rating prediction on both affective dimensions across three different languages
	when compared to state-of-the-art word similarity based methods. Our method can
	assist building word ratings for new languages and improve downstream tasks
	such as sentiment analysis and emotion detection.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sedoc-preoctiucpietro-ungar:2017:EACLshort</bibkey>
  </paper>

  <paper id="2091">
    <title>Attention Modeling for Targeted Sentiment</title>
    <author><first>Jiangming</first><last>Liu</last></author>
    <author><first>Yue</first><last>Zhang</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>572&#8211;577</pages>
    <url>http://www.aclweb.org/anthology/E17-2091</url>
    <abstract>Neural network models have been used for target-dependent sentiment analysis.
	Previous work focus on learning a target specific representation for a given
	input sentence which is used for classification. However, they do not
	explicitly model the contribution of each word in a sentence with respect to
	targeted sentiment polarities. We investigate an attention model to this end.
	In particular, a vanilla LSTM model is used to induce an attention value of the
	whole sentence. The model is further extended to differentiate left and right
	contexts given a certain target following previous work. Results show that by
	using attention to model the contribution of each word with respect to the
	target, our model gives significantly improved results over two standard
	benchmarks. We report the best accuracy for this task.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>liu-zhang:2017:EACLshort</bibkey>
  </paper>

  <paper id="2092">
    <title>EmoBank: Studying the Impact of Annotation Perspective and Representation Format on Dimensional Emotion Analysis</title>
    <author><first>Sven</first><last>Buechel</last></author>
    <author><first>Udo</first><last>Hahn</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>578&#8211;585</pages>
    <url>http://www.aclweb.org/anthology/E17-2092</url>
    <abstract>We describe EmoBank, a corpus of 10k English sentences balancing multiple
	genres, which we annotated with dimensional emotion metadata in the
	Valence-Arousal-Dominance (VAD) representation format. EmoBank excels with a
	bi-perspectival and bi-representational design. On the one hand, we distinguish
	between writer's and reader's emotions, on the other hand, a subset of the
	corpus complements dimensional VAD annotations with categorical ones based on
	Basic Emotions. We find evidence for the supremacy of the reader's
	perspective in terms of IAA and rating intensity, and achieve close-to-human
	performance when mapping between dimensional and categorical formats.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>buechel-hahn:2017:EACLshort</bibkey>
  </paper>

  <paper id="2093">
    <title>Structural Attention Neural Networks for improved sentiment analysis</title>
    <author><first>Filippos</first><last>Kokkinos</last></author>
    <author><first>Alexandros</first><last>Potamianos</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>586&#8211;591</pages>
    <url>http://www.aclweb.org/anthology/E17-2093</url>
    <abstract>We introduce a tree-structured attention neural network for sentences and small
	phrases and apply it to the problem of sentiment classification. Our model
	expands the current recursive models by incorporating structural information
	around a node of a syntactic tree using both  bottom-up and top-down
	information propagation. Also, the model utilizes structural attention to
	identify the most salient representations during the construction of the
	syntactic tree.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kokkinos-potamianos:2017:EACLshort</bibkey>
  </paper>

  <paper id="2094">
    <title>Ranking Convolutional Recurrent Neural Networks for Purchase Stage Identification on Imbalanced Twitter Data</title>
    <author><first>Heike</first><last>Adel</last></author>
    <author><first>Francine</first><last>Chen</last></author>
    <author><first>Yan-Ying</first><last>Chen</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>592&#8211;598</pages>
    <url>http://www.aclweb.org/anthology/E17-2094</url>
    <abstract>Users often use social media to share their interest in products. We propose to
	identify purchase stages from Twitter data following the AIDA model (Awareness,
	Interest, Desire, Action). In particular, we define the task of classifying the
	purchase stage of each tweet in a user's tweet sequence. We introduce RCRNN,
	a Ranking Convolutional Recurrent Neural Network which computes tweet
	representations using convolution over word embeddings and models a tweet
	sequence with gated recurrent units. Also, we consider various methods to cope
	with the imbalanced label distribution in our data and show that a ranking
	layer outperforms class weights.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>adel-chen-chen:2017:EACLshort</bibkey>
  </paper>

  <paper id="2095">
    <title>Context-Aware Graph Segmentation for Graph-Based Translation</title>
    <author><first>Liangyou</first><last>Li</last></author>
    <author><first>Andy</first><last>Way</last></author>
    <author><first>Qun</first><last>Liu</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>599&#8211;604</pages>
    <url>http://www.aclweb.org/anthology/E17-2095</url>
    <abstract>In this paper, we present an improved graph-based translation model which
	segments an input graph into node-induced subgraphs by taking source context
	into consideration. Translations are generated by combining subgraph
	translations left-to-right using beam search. Experiments on Chinese&#8211;English
	and German&#8211;English demonstrate that the context-aware segmentation
	significantly improves the baseline graph-based model.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>li-way-liu:2017:EACLshort</bibkey>
  </paper>

  <paper id="2096">
    <title>Reranking Translation Candidates Produced by Several Bilingual Word Similarity Sources</title>
    <author><first>Laurent</first><last>Jakubina</last></author>
    <author><first>Phillippe</first><last>Langlais</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>605&#8211;611</pages>
    <url>http://www.aclweb.org/anthology/E17-2096</url>
    <abstract>We investigate the reranking of the output of several distributional approaches
	on the Bilingual Lexicon Induction task.  We show that reranking an n-best list
	produced by any of those approaches leads to very substantial improvements. We
	further demonstrate that combining several n-best lists by reranking is an
	effective way of further boosting performance.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>jakubina-langlais:2017:EACLshort</bibkey>
  </paper>

  <paper id="2097">
    <title>Lexicalized Reordering for Left-to-Right Hierarchical Phrase-based Translation</title>
    <author><first>Maryam</first><last>Siahbani</last></author>
    <author><first>Anoop</first><last>Sarkar</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>612&#8211;618</pages>
    <url>http://www.aclweb.org/anthology/E17-2097</url>
    <abstract>Phrase-based and hierarchical phrase-based (Hiero) translation models differ
	radically in the way reordering is modeled. Lexicalized reordering models play
	an important role in phrase-based MT and such models have been added to
	CKY-based decoders for Hiero. Watanabe et al. (2006) proposed a promising
	decoding algorithm for Hiero (LR-Hiero) that visits input spans in arbitrary
	order and produces the translation in left to right (LR) order which leads to
	far fewer language model calls and leads to a considerable speedup in decoding.
	We introduce a novel shift-reduce algorithm to LR-Hiero to decode with our
	lexicalized reordering model (LRM) and show that it improves translation
	quality for Czech-English, Chinese-English and German-English.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>siahbani-sarkar:2017:EACLshort</bibkey>
  </paper>

  <paper id="2098">
    <title>Bootstrapping Unsupervised Bilingual Lexicon Induction</title>
    <author><first>Bradley</first><last>Hauer</last></author>
    <author><first>Garrett</first><last>Nicolai</last></author>
    <author><first>Grzegorz</first><last>Kondrak</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>619&#8211;624</pages>
    <url>http://www.aclweb.org/anthology/E17-2098</url>
    <abstract>The task of unsupervised lexicon induction is to find translation pairs across
	monolingual corpora. We develop a novel method that creates seed lexicons by
	identifying cognates in the vocabularies of related languages on the basis of
	their frequency and lexical similarity. We apply bidirectional bootstrapping to
	a method which learns a linear mapping between context-based vector spaces.
	Experimental results on three language pairs show consistent improvement over
	prior work.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>hauer-nicolai-kondrak:2017:EACLshort</bibkey>
  </paper>

  <paper id="2099">
    <title>Addressing Problems across Linguistic Levels in SMT: Combining Approaches to Model Morphology, Syntax and Lexical Choice</title>
    <author><first>Marion</first><last>Weller-Di Marco</last></author>
    <author><first>Alexander</first><last>Fraser</last></author>
    <author><first>Sabine</first><last>Schulte im Walde</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>625&#8211;630</pages>
    <url>http://www.aclweb.org/anthology/E17-2099</url>
    <abstract>Many errors in phrase-based SMT can be attributed to problems on three 
	linguistic levels: morphological complexity in the target language, 
	structural differences and lexical choice. We explore combinations of 
	linguistically motivated approaches to address these problems in 
	English-to-German SMT and show that they are complementary to one another,
	but also that the popular verbal pre-ordering can cause problems on the 
	morphological and lexical level.
	A discriminative classifier can overcome these problems, in particular 
	when enriching standard lexical features with features geared towards 
	verbal inflection.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>wellerdimarco-fraser-schulteimwalde:2017:EACLshort</bibkey>
  </paper>

  <paper id="2100">
    <title>Machine Translation of Spanish Personal and Possessive Pronouns Using Anaphora Probabilities</title>
    <author><first>Ngoc Quang</first><last>Luong</last></author>
    <author><first>Andrei</first><last>Popescu-Belis</last></author>
    <author><first>Annette</first><last>Rios Gonzales</last></author>
    <author><first>Don</first><last>Tuggener</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>631&#8211;636</pages>
    <url>http://www.aclweb.org/anthology/E17-2100</url>
    <abstract>We implement a fully probabilistic model to combine the hypotheses of a Spanish
	anaphora resolution system with those of a Spanish-English machine translation
	system.  The probabilities over antecedents are converted into probabilities
	for the features of translated pronouns, and are integrated with phrase-based
	MT using an additional translation model for pronouns.                          The
	system
	improves the
	translation of several Spanish personal and possessive pronouns into English,
	by solving translation divergencies such as 'ella' vs. 'she'/'it' or 'su' vs.
	'his'/'her'/'its'/'their'.  On a test set with 2,286 pronouns, a baseline
	system correctly translates 1,055 of them, while ours improves this by 41. 
	Moreover, with oracle antecedents, possessives are translated with an accuracy
	of 83%.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>luong-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2101">
    <title>Using Images to Improve Machine-Translating E-Commerce Product Listings.</title>
    <author><first>Iacer</first><last>Calixto</last></author>
    <author><first>Daniel</first><last>Stein</last></author>
    <author><first>Evgeny</first><last>Matusov</last></author>
    <author><first>Pintu</first><last>Lohar</last></author>
    <author><first>Sheila</first><last>Castilho</last></author>
    <author><first>Andy</first><last>Way</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>637&#8211;643</pages>
    <url>http://www.aclweb.org/anthology/E17-2101</url>
    <abstract>In this paper we study the impact of using images to machine-translate
	user-generated e-commerce product listings. We study how a multi-modal Neural
	Machine Translation (NMT) model compares to two text-only approaches: a
	conventional state-of-the-art attentional NMT and a Statistical Machine
	Translation (SMT) model. User-generated product listings often do not
	constitute grammatical or well-formed sentences. More often than not, they
	consist of the juxtaposition of short phrases or keywords. We train our models
	end-to-end as well as use text-only and multi-modal NMT models for re-ranking
	$n$-best lists generated by an SMT model. We qualitatively evaluate our
	user-generated training data also analyse how adding synthetic data impacts the
	results. We evaluate our models quantitatively using BLEU and TER and find that
	(i) additional synthetic data has a general positive impact on text-only and
	multi-modal NMT models, and that (ii) using a multi-modal NMT model for
	re-ranking n-best lists improves TER significantly across different n-best list
	sizes.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>calixto-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2102">
    <title>Continuous multilinguality with language vectors</title>
    <author><first>Robert</first><last>&#214;stling</last></author>
    <author><first>J&#246;rg</first><last>Tiedemann</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>644&#8211;649</pages>
    <url>http://www.aclweb.org/anthology/E17-2102</url>
    <abstract>Most existing models for multilingual natural language processing (NLP)
	treat language as a discrete category, and make predictions for either
	one language or the other. In contrast, we propose using continuous vector
	representations of language. We show that these can be learned efficiently
	with a character-based neural language model, and used to improve
	inference about language varieties not seen during training.
	In experiments with 1303 Bible translations into 990 different languages,
	we empirically explore the capacity of multilingual language models,
	and also show that the language vectors capture genetic
	relationships between languages.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>ostling-tiedemann:2017:EACLshort</bibkey>
  </paper>

  <paper id="2103">
    <title>Unsupervised Training for Large Vocabulary Translation Using Sparse Lexicon and Word Classes</title>
    <author><first>Yunsu</first><last>Kim</last></author>
    <author><first>Julian</first><last>Schamper</last></author>
    <author><first>Hermann</first><last>Ney</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>650&#8211;656</pages>
    <url>http://www.aclweb.org/anthology/E17-2103</url>
    <abstract>We address for the first time unsupervised training for a translation task with
	hundreds of thousands of vocabulary words. We scale up the
	expectation-maximization (EM) algorithm to learn a large translation table
	without any parallel text or seed lexicon. First, we solve the memory
	bottleneck and enforce the sparsity with a simple thresholding scheme for the
	lexicon. Second, we initialize the lexicon training with word classes, which
	efficiently boosts the performance. Our methods produced promising results on
	two large-scale unsupervised translation tasks.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kim-schamper-ney:2017:EACLshort</bibkey>
  </paper>

  <paper id="2104">
    <title>Co-reference Resolution of Elided Subjects and Possessive Pronouns in Spanish-English Statistical Machine Translation</title>
    <author><first>Annette</first><last>Rios Gonzales</last></author>
    <author><first>Don</first><last>Tuggener</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>657&#8211;662</pages>
    <url>http://www.aclweb.org/anthology/E17-2104</url>
    <abstract>This paper presents a straightforward method to integrate co-reference
	information into phrase-based machine translation to address the problems of 
	i) elided subjects and ii) morphological underspecification of pronouns when
	translating from pro-drop languages.
	We evaluate the method for the language pair Spanish-English and find that
	translation quality improves with the addition of co-reference information.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>riosgonzales-tuggener:2017:EACLshort</bibkey>
  </paper>

  <paper id="2105">
    <title>Large-Scale Categorization of Japanese Product Titles Using Neural Attention Models</title>
    <author><first>Yandi</first><last>Xia</last></author>
    <author><first>Aaron</first><last>Levine</last></author>
    <author><first>Pradipto</first><last>Das</last></author>
    <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
    <author><first>Keiji</first><last>Shinzato</last></author>
    <author><first>Ankur</first><last>Datta</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>663&#8211;668</pages>
    <url>http://www.aclweb.org/anthology/E17-2105</url>
    <abstract>We propose a variant of Convolutional Neural Network (CNN) models, the
	Attention CNN (ACNN); for large-scale categorization of millions of Japanese
	items  into thirty-five product categories.
	Compared to a state-of-the-art Gradient Boosted Tree (GBT) classifier, the
	proposed model reduces training time from three weeks to three days while
	maintaining more than 96% accuracy.
	Additionally, our proposed model characterizes products by imputing attentive
	focus on word tokens in a language agnostic way.
	The attention words have been observed to be semantically highly correlated
	with the predicted categories and give us a choice of automatic feature
	extraction for downstream processing.
	Author{2}Affiliation</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>xia-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2106">
    <title>Convolutional Neural Networks for Authorship Attribution of Short Texts</title>
    <author><first>Prasha</first><last>Shrestha</last></author>
    <author><first>Sebastian</first><last>Sierra</last></author>
    <author><first>Fabio</first><last>Gonzalez</last></author>
    <author><first>Manuel</first><last>Montes</last></author>
    <author><first>Paolo</first><last>Rosso</last></author>
    <author><first>Thamar</first><last>Solorio</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>669&#8211;674</pages>
    <url>http://www.aclweb.org/anthology/E17-2106</url>
    <abstract>We present a model to perform authorship attribution of tweets using
	Convolutional Neural Networks (CNNs) over character n-grams. We also present a
	strategy that improves model interpretability by estimating the importance of
	input text fragments in the predicted classification.
	The experimental evaluation shows that text CNNs perform competitively and are
	able to outperform previous methods.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>shrestha-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2107">
    <title>Aspect Extraction from Product Reviews Using Category Hierarchy Information</title>
    <author><first>Yinfei</first><last>Yang</last></author>
    <author><first>Cen</first><last>Chen</last></author>
    <author><first>Minghui</first><last>Qiu</last></author>
    <author><first>Forrest</first><last>Bao</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>675&#8211;680</pages>
    <url>http://www.aclweb.org/anthology/E17-2107</url>
    <abstract>Aspect extraction abstracts the common properties of objects from corpora
	discussing them, such as reviews of products.
	Recent work on aspect extraction is leveraging the hierarchical relationship
	between products and their categories. 
	However, such effort focuses on the aspects of child categories but ignores
	those from parent categories.
	Hence, we propose an LDA-based generative topic model inducing the two-layer
	categorical information (CAT-LDA), to balance the aspects of both a parent
	category and its child categories.
	Our hypothesis is that child categories inherit aspects from parent categories,
	controlled by the hierarchy between them. 
	Experimental results on 5 categories of Amazon.com products show that both
	common aspects of parent category and the individual aspects of sub-categories
	can be extracted to align well with the common sense. 
	We further evaluate the manually extracted aspects of 16 products, resulting in
	an average hit rate of 79.10%.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>yang-EtAl:2017:EACLshort2</bibkey>
  </paper>

  <paper id="2108">
    <title>On the Relevance of Syntactic and Discourse Features for Author Profiling and Identification</title>
    <author><first>Juan</first><last>Soler</last></author>
    <author><first>Leo</first><last>Wanner</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>681&#8211;687</pages>
    <url>http://www.aclweb.org/anthology/E17-2108</url>
    <abstract>The majority of approaches to author profiling and author identification focus
	mainly on lexical features, i.e., on the content of a text. We argue that
	syntactic and discourse features play a significantly more prominent role than
	they were given in the past. We show that they achieve state-of-the-art
	performance in author and gender identification on a literary corpus while
	keeping the feature set small: the used feature set is composed of only 188
	features and still outperforms the winner of the PAN 2014 shared task on author
	verification in the literary genre.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>soler-wanner:2017:EACLshort</bibkey>
  </paper>

  <paper id="2109">
    <title>Unsupervised Cross-Lingual Scaling of Political Texts</title>
    <author><first>Goran</first><last>Glava&#x161;</last></author>
    <author><first>Federico</first><last>Nanni</last></author>
    <author><first>Simone Paolo</first><last>Ponzetto</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>688&#8211;693</pages>
    <url>http://www.aclweb.org/anthology/E17-2109</url>
    <abstract>Political text scaling aims to linearly order parties and politicians across
	political dimensions (e.g.,~left-to-right ideology) based on textual content
	(e.g.,~politician speeches or party manifestos). Existing models scale texts
	based on relative word usage and cannot be used for cross-lingual analyses.
	Additionally, there is little quantitative evidence that the output of these
	models correlates with common political dimensions like left-to-right
	orientation. Experimental results show that the semantically-informed scaling
	models better predict the party positions than the existing word-based models
	in two different political dimensions. Furthermore, the proposed models exhibit
	no drop in performance in the cross-lingual compared to monolingual setting.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>glavavs-nanni-ponzetto:2017:EACLshort</bibkey>
  </paper>

  <paper id="2110">
    <title>Neural Networks for Joint Sentence Classification in Medical Paper Abstracts</title>
    <author><first>Franck</first><last>Dernoncourt</last></author>
    <author><first>Ji Young</first><last>Lee</last></author>
    <author><first>Peter</first><last>Szolovits</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>694&#8211;700</pages>
    <url>http://www.aclweb.org/anthology/E17-2110</url>
    <abstract>Existing models based on artificial neural networks (ANNs) for sentence
	classification often do not incorporate the context in which sentences appear,
	and classify sentences individually. However, traditional sentence
	classification approaches have been shown to greatly benefit from jointly
	classifying subsequent sentences, such as with conditional random fields. In
	this work, we present an ANN architecture that combines the effectiveness of
	typical ANN models to classify sentences in isolation, with the strength of
	structured prediction. Our model outperforms the state-of-the-art results on
	two different datasets for sequential sentence classification in medical
	abstracts.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>dernoncourt-lee-szolovits:2017:EACLshort</bibkey>
  </paper>

  <paper id="2111">
    <title>Multimodal Topic Labelling</title>
    <author><first>Ionut</first><last>Sorodoc</last></author>
    <author><first>Jey Han</first><last>Lau</last></author>
    <author><first>Nikolaos</first><last>Aletras</last></author>
    <author><first>Timothy</first><last>Baldwin</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>701&#8211;706</pages>
    <url>http://www.aclweb.org/anthology/E17-2111</url>
    <abstract>Topics generated by topic models are typically presented as a list of topic
	terms. Automatic topic labelling is the task of generating a succinct label
	that summarises the theme or subject of a topic, with the intention of reducing
	the cognitive load of end-users when interpreting these topics. Traditionally,
	topic label systems focus on a single label modality, e.g. textual labels. In
	this work we propose a multimodal approach to topic labelling using a simple
	feedforward neural network. Given a topic and a candidate image or textual
	label, our method automatically generates a rating for the label, relative to
	the topic. Experiments show that this multimodal approach outperforms
	single-modality topic labelling systems.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>sorodoc-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2112">
    <title>Detecting (Un)Important Content for Single-Document News Summarization</title>
    <author><first>Yinfei</first><last>Yang</last></author>
    <author><first>Forrest</first><last>Bao</last></author>
    <author><first>Ani</first><last>Nenkova</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>707&#8211;712</pages>
    <url>http://www.aclweb.org/anthology/E17-2112</url>
    <abstract>We present a robust approach for detecting intrinsic sentence importance in
	news, by training on two corpora of document-summary pairs. When used for
	single-document summarization, our approach, combined with the &#x201c;beginning of
	document&#x201d; heuristic, outperforms a state-of-the-art summarizer and the
	beginning-of-article baseline in both automatic and manual evaluations. These
	results represent an important advance because in the absence of cross-document
	repetition, single document summarizers for news have not been able to
	consistently outperform the strong beginning-of-article baseline.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>yang-bao-nenkova:2017:EACLshort</bibkey>
  </paper>

  <paper id="2113">
    <title>F-Score Driven Max Margin Neural Network for Named Entity Recognition in Chinese Social Media</title>
    <author><first>Hangfeng</first><last>He</last></author>
    <author><first>Xu</first><last>Sun</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>713&#8211;718</pages>
    <url>http://www.aclweb.org/anthology/E17-2113</url>
    <abstract>We focus on named entity recognition (NER) for Chinese social media. With
	massive unlabeled text and quite limited labelled corpus, we propose a
	semi-supervised learning model based on B-LSTM neural network. To take
	advantage of traditional methods in NER such as CRF, we combine transition
	probability with deep learning in our model. To bridge the gap between label
	accuracy and F-score of NER, we construct a model which can be directly trained
	on F-score. When considering the instability of F-score driven method and
	meaningful information provided by label accuracy, we propose an integrated
	method to train on both F-score and label accuracy. Our integrated model yields
	7.44% improvement over previous state-of-the-art result.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>he-sun:2017:EACLshort</bibkey>
  </paper>

  <paper id="2114">
    <title>Discriminative Information Retrieval for Question Answering Sentence Selection</title>
    <author><first>Tongfei</first><last>Chen</last></author>
    <author><first>Benjamin</first><last>Van Durme</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>719&#8211;725</pages>
    <url>http://www.aclweb.org/anthology/E17-2114</url>
    <abstract>We propose a framework for discriminative IR atop linguistic features, trained
	to improve the recall of answer candidate passage retrieval, the initial step
	in
	text-based question answering. We formalize this as an instance of linear
	feature-based IR, demonstrating a 34%-43% improvement in recall for candidate
	triage for QA.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>chen-vandurme:2017:EACLshort</bibkey>
  </paper>

  <paper id="2115">
    <title>Effective shared representations with Multitask Learning for Community Question Answering</title>
    <author><first>Daniele</first><last>Bonadiman</last></author>
    <author><first>Antonio</first><last>Uva</last></author>
    <author><first>Alessandro</first><last>Moschitti</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>726&#8211;732</pages>
    <url>http://www.aclweb.org/anthology/E17-2115</url>
    <abstract>An important asset of using Deep Neural Networks (DNNs) for text applications
	is their ability to automatically engineering features.
	Unfortunately, DNNs usually require a lot of training data, especially for
	highly semantic tasks such as community Question Answering (cQA). In this
	paper, we tackle the problem of data scarcity by learning the target DNN
	together with two auxiliary tasks in a multitask learning setting. We exploit
	the strong semantic connection between selection of comments relevant to (i)
	new questions and (ii) forum questions. This enables a global representation
	for comments, new and previous questions.
	The experiments of our model on a SemEval challenge dataset for cQA show a 20%
	of relative improvement over standard DNNs.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bonadiman-uva-moschitti:2017:EACLshort</bibkey>
  </paper>

  <paper id="2116">
    <title>Learning User Embeddings from Emails</title>
    <author><first>Yan</first><last>Song</last></author>
    <author><first>Chia-Jung</first><last>Lee</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>733&#8211;738</pages>
    <url>http://www.aclweb.org/anthology/E17-2116</url>
    <abstract>Many important email-related tasks, such as email classification or search,
	highly rely on building quality document representations (e.g., bag-of-words or
	key phrases)  to assist matching and understanding. 
	Despite prior success on representing textual messages, creating quality user
	representations from emails was overlooked. In this paper, we propose to 
	represent users using embeddings that are trained to reflect the email
	communication network. Our experiments on Enron dataset suggest that the
	resulting embeddings capture the semantic distance between users. To assess the
	quality of embeddings in a real-world application, we carry out  auto-foldering
	task where the lexical representation of an email is enriched with user
	embedding features. Our results show that folder prediction accuracy is
	improved when embedding features are present across multiple settings.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>song-lee:2017:EACLshort</bibkey>
  </paper>

  <paper id="2117">
    <title>Temporal information extraction from clinical text</title>
    <author><first>Julien</first><last>Tourille</last></author>
    <author><first>Olivier</first><last>Ferret</last></author>
    <author><first>Xavier</first><last>Tannier</last></author>
    <author><first>Aurelie</first><last>Neveol</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>739&#8211;745</pages>
    <url>http://www.aclweb.org/anthology/E17-2117</url>
    <abstract>In this paper, we present a method for temporal relation extraction from
	clinical narratives in French and in English. We experiment on two comparable
	corpora, the MERLOT corpus and the THYME corpus, and show that a common
	approach can be used for both languages.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>tourille-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2118">
    <title>Neural Temporal Relation Extraction</title>
    <author><first>Dmitriy</first><last>Dligach</last></author>
    <author><first>Timothy</first><last>Miller</last></author>
    <author><first>Chen</first><last>Lin</last></author>
    <author><first>Steven</first><last>Bethard</last></author>
    <author><first>Guergana</first><last>Savova</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>746&#8211;751</pages>
    <url>http://www.aclweb.org/anthology/E17-2118</url>
    <abstract>We experiment with neural architectures for temporal relation extraction and
	establish a new state-of-the-art for several scenarios. We find that neural
	models with only tokens as input outperform state-of-the-art hand-engineered
	feature-based models, that convolutional neural networks outperform LSTM
	models, and
	that encoding relation arguments with XML tags outperforms a traditional
	position-based encoding.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>dligach-EtAl:2017:EACLshort</bibkey>
  </paper>

  <paper id="2119">
    <title>End-to-End Trainable Attentive Decoder for Hierarchical Entity Classification</title>
    <author><first>Sanjeev</first><last>Karn</last></author>
    <author><first>Ulli</first><last>Waltinger</last></author>
    <author><first>Hinrich</first><last>Sch&#252;tze</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>752&#8211;758</pages>
    <url>http://www.aclweb.org/anthology/E17-2119</url>
    <abstract>We address fine-grained entity classification and
	propose a novel attention-based recurrent neural network
	(RNN) encoder-decoder that generates paths in the type hierarchy and can be
	trained end-to-end.  
	We show that our model performs better
	on fine-grained entity classification than prior work that relies on
	flat or local classifiers that do not directly model
	hierarchical structure.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>karn-waltinger-schutze:2017:EACLshort</bibkey>
  </paper>

  <paper id="2120">
    <title>Neural Graphical Models over Strings for Principal Parts Morphological Paradigm Completion</title>
    <author><first>Ryan</first><last>Cotterell</last></author>
    <author><first>John</first><last>Sylak-Glassman</last></author>
    <author><first>Christo</first><last>Kirov</last></author>
    <booktitle>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
    <month>April</month>
    <year>2017</year>
    <address>Valencia, Spain</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>759&#8211;765</pages>
    <url>http://www.aclweb.org/anthology/E17-2120</url>
    <abstract>Many of the world's languages contain an abundance of inflected forms for each
	lexeme. A critical task in processing such languages is predicting these
	inflected forms. We develop a novel statistical model for the problem, drawing
	on graphical modeling techniques and recent advances in deep learning. We
	derive a Metropolis-Hastings algorithm to jointly decode the model. Our
	Bayesian network draws inspiration from principal parts morphological analysis.
	We demonstrate improvements on 5 languages.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>cotterell-sylakglassman-kirov:2017:EACLshort</bibkey>
  </paper>

</volume>

