@Book{C18-1:2018,
  editor    = {Emily M. Bender  and  Leon Derczynski  and  Pierre Isabelle},
  title     = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  url       = {http://www.aclweb.org/anthology/C18-1}
}

@InProceedings{jahan-chauhan-finlayson:2018:C18-1,
  author    = {Jahan, Labiba  and  Chauhan, Geeticka  and  Finlayson, Mark},
  title     = {A New Approach to Animacy Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1--12},
  abstract  = {Animacy is a necessary property for a referent to be an agent, and thus},
  url       = {http://www.aclweb.org/anthology/C18-1001}
}

@InProceedings{yin-EtAl:2018:C18-1,
  author    = {Yin, Qingyu  and  Zhang, Yu  and  Zhang, Weinan  and  Liu, Ting  and  Wang, William Yang},
  title     = {Zero Pronoun Resolution with Attention-based Neural Network},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {13--23},
  abstract  = {Recent neural network methods for zero pronoun resolution explore multiple models for generating representation vectors for zero pronouns and their candidate antecedents. Typically, contextual information is utilized to encode the zero pronouns since they are simply gaps that contain no actual content. To better utilize contexts of the zero pronouns, we here introduce the self-attention mechanism for encoding zero pronouns. With the help of the multiple hops of attention, our model is able to focus on some informative parts of the associated texts and therefore produces an efficient way of encoding the zero pronouns. In addition, an attention-based recurrent neural network is proposed for encoding candidate antecedents by their contents. Experiment results are encouraging: our proposed attention-based model gains the best performance on the Chinese portion of the OntoNotes corpus, substantially surpasses existing Chinese zero pronoun resolution baseline systems.},
  url       = {http://www.aclweb.org/anthology/C18-1002}
}

@InProceedings{zhou-choi:2018:C18-1,
  author    = {Zhou, Ethan  and  Choi, Jinho D.},
  title     = {They Exist! Introducing Plural Mentions to Coreference Resolution and Entity Linking},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {24--34},
  abstract  = {This paper analyzes arguably the most challenging yet under-explored aspect of resolution tasks such as coreference resolution and entity linking, that is the resolution of plural mentions. Unlike singular mentions each of which represents one entity, plural mentions stand for multiple entities. To tackle this aspect, we take the character identification corpus from the SemEval 2018 shared task that consists of entity annotation for singular mentions, and expand it by adding annotation for plural mentions. We then introduce a novel coreference resolution algorithm that selectively creates clusters to handle both singular and plural mentions, and also a deep learning-based entity linking model that jointly handles both types of mentions through multi-task learning. Adjusted evaluation metrics are proposed for these tasks as well to handle the uniqueness of plural mentions. Our experiments show that the new coreference resolution and entity linking models significantly outperform traditional models designed only for singular mentions. To the best of our knowledge, this is the first time that plural mentions are thoroughly analyzed for these two resolution tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1003}
}

@InProceedings{meng-rumshisky:2018:C18-1,
  author    = {Meng, Yuanliang  and  Rumshisky, Anna},
  title     = {Triad-based Neural Network for Coreference Resolution},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {35--43},
  abstract  = {We propose a triad-based neural network system that generates affinity scores between entity mentions for coreference resolution. The system simultaneously accepts three mentions as input, taking mutual dependency and logical constraints of all three mentions into account, and thus makes more accurate predictions than the traditional pairwise approach. Depending on system choices, the affinity scores can be further used in clustering or mention ranking. Our experiments show that a standard hierarchical clustering using the scores produces state-of-art results with MUC and B 3 metrics on the English portion of CoNLL 2012 Shared Task. The model does not rely on many handcrafted features and is easy to train and use. The triads can also be easily extended to polyads of higher orders. To our knowledge, this is the first neural network system to model mutual dependency of more than two members at mention level.},
  url       = {http://www.aclweb.org/anthology/C18-1004}
}

@InProceedings{xu-EtAl:2018:C18-11,
  author    = {Xu, Hongzhi  and  Marcus, Mitchell  and  Yang, Charles  and  Ungar, Lyle},
  title     = {Unsupervised Morphology Learning with Statistical Paradigms},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {44--54},
  abstract  = {This paper describes an unsupervised model for morphological segmentation that exploits the notion of paradigms, which are sets of morphological categories (e.g., suffixes) that can be applied to a homogeneous set of words (e.g., nouns or verbs). Our algorithm identifies statistically reliable paradigms from the morphological segmentation result of a probabilistic model, and chooses reliable suffixes from them. The new suffixes can be fed back iteratively to improve the accuracy of the probabilistic model. Finally, the unreliable paradigms are subjected to pruning to eliminate unreliable morphological relations between words. The paradigm-based algorithm significantly improves segmentation accuracy. Our method achieves start-of-the-art results on experiments using the Morpho-Challenge data, including English, Turkish, and Finnish.},
  url       = {http://www.aclweb.org/anthology/C18-1005}
}

@InProceedings{mager-EtAl:2018:C18-1,
  author    = {Mager, Manuel  and  Gutierrez-Vasques, Ximena  and  Sierra, Gerardo  and  Meza-Ruiz, Ivan},
  title     = {Challenges of language technologies for the indigenous languages of the Americas},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {55--69},
  abstract  = {Indigenous languages of the American continent are highly diverse. However, they have received little attention from the technological perspective. In this paper, we review the research, the digital resources and the available NLP systems that focus on these languages. We present the main challenges and research questions that arise when distant languages and low-resource scenarios are faced. We would like to encourage NLP research in linguistically rich and diverse areas like the Americas.},
  url       = {http://www.aclweb.org/anthology/C18-1006}
}

@InProceedings{muis-EtAl:2018:C18-1,
  author    = {Muis, Aldrian Obaja  and  Otani, Naoki  and  Vyas, Nidhi  and  Xu, Ruochen  and  Yang, Yiming  and  Mitamura, Teruko  and  Hovy, Eduard},
  title     = {Low-resource Cross-lingual Event Type Detection via Distant Supervision with Minimal Effort},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {70--82},
  abstract  = {The use of machine learning for NLP generally requires resources for training. Tasks performed in a low-resource language usually rely on labeled data in another, typically resource-rich, language. However, there might not be enough labeled data even in a resource-rich language such as English. In such cases, one approach is to use a hand-crafted approach that utilizes only a small bilingual dictionary with minimal manual verification to create distantly supervised data. Another is to explore typical machine learning techniques, for example adversarial training of bilingual word representations. We find that in event-type detection task—the task to classify [parts of] documents into a fixed set of labels—they give about the same performance. We explore ways in which the two methods can be complementary and also see how to best utilize a limited budget for manual annotation to maximize performance gain.},
  url       = {http://www.aclweb.org/anthology/C18-1007}
}

@InProceedings{makarov-clematide:2018:C18-1,
  author    = {Makarov, Peter  and  Clematide, Simon},
  title     = {Neural Transition-based String Transduction for Limited-Resource Setting in Morphology},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {83--93},
  abstract  = {We present a neural transition-based model that uses a simple set of edit},
  url       = {http://www.aclweb.org/anthology/C18-1008}
}

@InProceedings{matsubayashi-inui:2018:C18-1,
  author    = {Matsubayashi, Yuichiroh  and  Inui, Kentaro},
  title     = {Distance-Free Modeling of Multi-Predicate Interactions in End-to-End Japanese Predicate-Argument Structure Analysis},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {94--106},
  abstract  = {Capturing interactions among multiple predicate-argument structures (PASs) is a crucial issue in the task of analyzing PAS in Japanese. In this paper, we propose new Japanese PAS analysis models that integrate the label prediction information of arguments in multiple PASs by extending the input and last layers of a standard deep bidirectional recurrent neural network (bi-RNN) model. In these models, using the mechanisms of pooling and attention, we aim to directly capture the potential interactions among multiple PASs, without being disturbed by the word order and distance. Our experiments show that the proposed models improve the prediction accuracy specifically for cases where the predicate and argument are in an indirect dependency relation and achieve a new state of the art in the overall $F\_1$ on a standard benchmark corpus.},
  url       = {http://www.aclweb.org/anthology/C18-1009}
}

@InProceedings{rehbein-ruppenhofer:2018:C18-1,
  author    = {Rehbein, Ines  and  Ruppenhofer, Josef},
  title     = {Sprucing up the trees -- Error detection in treebanks},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {107--118},
  abstract  = {We present a method for detecting annotation errors in manually and automatically annotated dependency parse trees, based on ensemble parsing in combination with Bayesian inference, guided by active learning. We evaluate our method in different scenarios: (i) for error detection in dependency treebanks and (ii) for improving parsing accuracy on in- and out-of-domain data.},
  url       = {http://www.aclweb.org/anthology/C18-1010}
}

@InProceedings{teng-zhang:2018:C18-1,
  author    = {Teng, Zhiyang  and  Zhang, Yue},
  title     = {Two Local Models for Neural Constituent Parsing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {119--132},
  abstract  = {Non-local features have been exploited by syntactic parsers for capturing dependencies between sub output structures. Such features have been a key to the success of state-of-the-art statistical parsers. With the rise of deep learning, however, it has been shown that local output decisions can give highly competitive accuracies, thanks to the power of dense neural input representations that embody global syntactic information. We investigate two conceptually simple local neural models for constituent parsing, which make local decisions to constituent spans and CFG rules, respectively. Consistent with previous findings along the line, our best model gives highly competitive results, achieving the labeled bracketing F1 scores of 92.4% on PTB and 87.3% on CTB 5.1.},
  url       = {http://www.aclweb.org/anthology/C18-1011}
}

@InProceedings{chowdhury-zamparelli:2018:C18-1,
  author    = {Chowdhury, Shammur Absar  and  Zamparelli, Roberto},
  title     = {RNN Simulations of Grammaticality Judgments on Long-distance Dependencies},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {133--144},
  abstract  = {The paper explores the ability of LSTM networks trained on a language modeling task to detect linguistic structures which are ungrammatical due to extraction violations (extra arguments and subject-relative clause island violations), and considers its implications for the debate on language innatism. The results show that the current RNN model can correctly classify (un)grammatical sentences, in certain conditions, but it is sensitive to linguistic processing factors and probably ultimately unable to induce a more abstract notion of grammaticality, at least in the domain we tested.},
  url       = {http://www.aclweb.org/anthology/C18-1012}
}

@InProceedings{eidelman-kornilova-argyle:2018:C18-1,
  author    = {Eidelman, Vladimir  and  Kornilova, Anastassia  and  Argyle, Daniel},
  title     = {How Predictable is Your State? Leveraging Lexical and Contextual Information for Predicting Legislative Floor Action at the State Level},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {145--160},
  abstract  = {Modeling U.S. Congressional legislation and roll-call votes has received significant attention in previous literature, and while legislators across 50 state governments and D.C. propose over 100,000 bills each year, enacting over 30% of them on average, state level analysis has received relatively less attention due in part to the difficulty in obtaining the necessary data. Since each state legislature is guided by their own procedures, politics and issues, however, it is difficult to qualitatively asses the factors that affect the likelihood of a legislative initiative succeeding. },
  url       = {http://www.aclweb.org/anthology/C18-1013}
}

@InProceedings{geva-berant:2018:C18-1,
  author    = {Geva, Mor  and  Berant, Jonathan},
  title     = {Learning to Search in Long Documents Using Document Structure},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {161--176},
  abstract  = {Reading comprehension models are based on recurrent neural networks that sequentially process the document tokens. As interest turns to answering more complex questions over longer documents, sequential reading of large portions of text becomes a substantial bottleneck. },
  url       = {http://www.aclweb.org/anthology/C18-1014}
}

@InProceedings{hong-EtAl:2018:C18-1,
  author    = {Hong, Yu  and  Xu, Yang  and  Ruan, Huibin  and  Zou, Bowei  and  Yao, Jianmin  and  Zhou, Guodong},
  title     = {Incorporating Image Matching Into Knowledge Acquisition for Event-Oriented Relation Recognition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {177--189},
  abstract  = {Event relation recognition is a challenging language processing task. It is required to determine the relation class of a pair of query events, such as causality, under the condition that there isn't any reliable clue for use. We follow the traditional statistical approach in this paper, speculating the relation class of the target events based on the relation-class distributions on the similar events. There is minimal supervision used during the speculation process. In particular, we incorporate image processing into the acquisition of similar event instances, including the utilization of images for visually representing event scenes, and the use of the neural network based image matching for approximate calculation between events. We test our method on the ACE-R2 corpus and compared our model with the fully-supervised neural network models. Experimental results show that we achieve a comparable performance to CNN while slightly better than LSTM.},
  url       = {http://www.aclweb.org/anthology/C18-1015}
}

@InProceedings{yamada-shindo-takefuji:2018:C18-1,
  author    = {Yamada, Ikuya  and  Shindo, Hiroyuki  and  Takefuji, Yoshiyasu},
  title     = {Representation Learning of Entities and Documents from Knowledge Base Descriptions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {190--201},
  abstract  = {In this paper, we describe TextEnt, a neural network model that learns distributed representations of entities and documents directly from a knowledge base (KB). Given a document in a KB consisting of words and entity annotations, we train our model to predict the entity that the document describes and map the document and its target entity close to each other in a continuous vector space. Our model is trained using a large number of documents extracted from Wikipedia. The performance of the proposed model is evaluated using two tasks, namely fine-grained entity typing and multiclass text classification. The results demonstrate that our model achieves state-of-the-art performance on both tasks. The code and the trained representations are made available online for further academic research.},
  url       = {http://www.aclweb.org/anthology/C18-1016}
}

@InProceedings{kulkarni-EtAl:2018:C18-1,
  author    = {Kulkarni, Vivek  and  Tian, Yingtao  and  Dandiwala, Parth  and  Skiena, Steve},
  title     = {Simple Neologism Based Domain Independent Models to Predict Year of Authorship},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {202--212},
  abstract  = {We present domain independent models to date documents based only on neologism usage patterns. Our models capture patterns of neologism usage over time to date texts, provide insights into temporal locality of word usage over a span of 150 years, and generalize to various domains like News, Fiction, and Non-Fiction with competitive performance. Quite intriguingly, we show that by modeling only the distribution of usage counts over neologisms (the model being agnostic of the particular words themselves), we achieve competitive performance using several orders of magnitude fewer features (only 200 input features) compared to state of the art models some of which use 200K features.},
  url       = {http://www.aclweb.org/anthology/C18-1017}
}

@InProceedings{huang-EtAl:2018:C18-1,
  author    = {Huang, Danqing  and  Liu, Jing  and  Lin, Chin-Yew  and  Yin, Jian},
  title     = {Neural Math Word Problem Solver with Reinforcement Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {213--223},
  abstract  = {Sequence-to-sequence model has been applied to solve math word problems. The model takes math problem descriptions as input and generates equations as output. The advantage of sequence-to-sequence model requires no feature engineering and can generate equations that do not exist in training data. However, our experimental analysis reveals that this model suffers from two shortcomings: (1) generate spurious numbers; (2) generate numbers at wrong positions. In this paper, we propose incorporating copy and alignment mechanism to the sequence-to-sequence model (namely CASS) to address these shortcomings. To train our model, we apply reinforcement learning to directly optimize the solution accuracy. It overcomes the ``train-test discrepancy'' issue of maximum likelihood estimation, which uses the surrogate objective of maximizing equation likelihood during training while the evaluation metric is solution accuracy (non-differentiable) at test time. Furthermore, to explore the effectiveness of our neural model, we use our model output as a feature and incorporate it into the feature-based model. Experimental results show that (1) The copy and alignment mechanism is effective to address the two issues; (2) Reinforcement learning leads to better performance than maximum likelihood on this task; (3) Our neural model is complementary to the feature-based model and their combination significantly outperforms the state-of-the-art results.},
  url       = {http://www.aclweb.org/anthology/C18-1018}
}

@InProceedings{lee-yeung:2018:C18-1,
  author    = {Lee, John  and  Yeung, Chak Yan},
  title     = {Personalizing Lexical Simplification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {224--232},
  abstract  = {A lexical simplification (LS) system aims to substitute complex words with simple words in a text, while preserving its meaning and grammaticality. Despite individual users' differences in vocabulary knowledge, current systems do not consider these variations; rather, they are trained to find one optimal substitution or ranked list of substitutions for all users. We evaluate the performance of a state-of-the-art LS system on individual learners of English at different proficiency levels, and measure the benefits of using complex word identification (CWI) models to personalize the system. Experimental results show that even a simple personalized CWI model, based on graded vocabulary lists, can help the system avoid some unnecessary simplifications and produce more readable output.},
  url       = {http://www.aclweb.org/anthology/C18-1019}
}

@InProceedings{kuznetsov-gurevych:2018:C18-1,
  author    = {Kuznetsov, Ilia  and  Gurevych, Iryna},
  title     = {From Text to Lexicon: Bridging the Gap between Word Embeddings and Lexical Resources},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {233--244},
  abstract  = {Distributional word representations (often referred to as word embeddings) are omnipresent in modern NLP. Early work has focused on building representations for word types, and recent studies show that lemmatization and part of speech (POS) disambiguation of targets in isolation improve the performance of word embeddings on a range of downstream tasks. However, the reasons behind these improvements, the qualitative effects of these operations and the combined performance of lemmatized and POS disambiguated targets are less studied. This work aims to close this gap and puts previous findings into a general perspective. We examine the effect of lemmatization and POS typing on word embedding performance in a novel resource-based evaluation scenario, as well as on standard similarity benchmarks. We show that these two operations have complimentary qualitative and vocabulary-level effects and are best used in combination. We find that the improvement is more pronounced for verbs and show how lemmatization and POS typing implicitly target some of the verb-specific issues. We claim that the observed improvement is a result of better conceptual alignment between word embeddings and lexical resources, stressing the need for conceptually plausible modeling of word embedding targets.},
  url       = {http://www.aclweb.org/anthology/C18-1020}
}

@InProceedings{bingel-paetzold-sgaard:2018:C18-1,
  author    = {Bingel, Joachim  and  Paetzold, Gustavo  and  Søgaard, Anders},
  title     = {Lexi: A tool for adaptive, personalized text simplification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {245--258},
  abstract  = {Most previous research in text simplification has aimed to develop generic solutions, assuming very homogeneous target audiences with consistent intra-group simplification needs. We argue that this assumption does not hold, and that instead we need to develop simplification systems that adapt to the individual needs of specific users. As a first step towards personalized simplification, we propose a framework for adaptive lexical simplification and introduce Lexi, a free open-source and easily extensible tool for adaptive, personalized text simplification. Lexi is easily installed as a browser extension, enabling easy access to the service for its users.},
  url       = {http://www.aclweb.org/anthology/C18-1021}
}

@InProceedings{jiang-EtAl:2018:C18-11,
  author    = {Jiang, Shenhao  and  Prasad, Animesh  and  Kan, Min-Yen  and  Sugiyama, Kazunari},
  title     = {Identifying Emergent Research Trends by Key Authors and Phrases},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {259--269},
  abstract  = {Identifying emergent research trends is a key issue for both primary researchers as well as secondary research managers. Such processes can uncover the historical development of an area, and yield insight on developing topics. We propose an embedded trend detection framework for this task which incorporates our bijunctive hypothesis that important phrases are written by important authors within a field and vice versa. By ranking both author and phrase information in a multigraph, our method jointly determines key phrases and authoritative authors. We represent this intermediate output as phrasal embeddings, and feed this to a recurrent neural network (RNN) to compute trend scores that identify research trends. Over two large datasets of scientific articles, we demonstrate that our approach successfully detects past trends from the field, outperforming baselines based solely on text centrality or citation.},
  url       = {http://www.aclweb.org/anthology/C18-1022}
}

@InProceedings{lan-jiang:2018:C18-1,
  author    = {Lan, Yunshi  and  Jiang, Jing},
  title     = {Embedding WordNet Knowledge for Textual Entailment},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {270--281},
  abstract  = {In this paper, we study how we can improve a deep learning approach to textual entailment by incorporating lexical entailment relations from WordNet. Our idea is to embed the lexical entailment knowledge contained in WordNet in specially-learned word vectors, which we call ``entailment vectors.'' We present a standard neural network model and a novel set-theoretic model to learn these entailment vectors from word pairs with known lexical entailment relations derived from WordNet. We further incorporate these entailment vectors into a decomposable attention model for textual entailment and evaluate the model on the SICK and the SNLI dataset. We find that using these special entailment word vectors, we can significantly improve the performance of textual entailment compared with a baseline that uses only standard word2vec vectors. The final performance of our model is close to or above the state of the art, but our method does not rely on any manually-crafted rules or extensive syntactic features.},
  url       = {http://www.aclweb.org/anthology/C18-1023}
}

@InProceedings{jin-EtAl:2018:C18-1,
  author    = {Jin, Hailong  and  Hou, Lei  and  Li, Juanzi  and  Dong, Tiansi},
  title     = {Attributed and Predictive Entity Embedding for Fine-Grained Entity Typing in Knowledge Bases},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {282--292},
  abstract  = {Fine-grained entity typing aims at identifying the semantic type of an entity in KB. Type information is very important in knowledge bases, but are unfortunately incomplete even in some large knowledge bases. Limitations of existing methods are either ignoring the structure and type information in KB or requiring large scale annotated corpus. To address these issues, we propose an attributed and predictive entity embedding method, which can fully utilize various kinds of information comprehensively. Extensive experiments on two real DBpedia datasets show that our proposed method significantly outperforms 8 state-of-the-art methods, with 4.0% and 5.2% improvement in Mi-F1 and Ma-F1, respectively.},
  url       = {http://www.aclweb.org/anthology/C18-1024}
}

@InProceedings{li-cheng-jia:2018:C18-1,
  author    = {Li, Bo  and  Cheng, Ping  and  Jia, Le},
  title     = {Joint Learning from Labeled and Unlabeled Data for Information Retrieval},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {293--302},
  abstract  = {Recently, a significant number of studies have focused on neural information retrieval (IR) models. One category of works use unlabeled data to train general word embeddings based on term proximity, which can be integrated into traditional IR models. The other category employs labeled data (e.g. click-through data) to train end-to-end neural IR models consisting of layers for target-specific representation learning. The latter idea accounts better for the IR task and is favored by recent research works, which is the one we will follow in this paper. We hypothesize that general semantics learned from unlabeled data can complement task-specific representation learned from labeled data of limited quality, and that a combination of the two is favorable. To this end, we propose a learning framework which can benefit from both labeled and more abundant unlabeled data for representation learning in the context of IR. Through a joint learning fashion in a single neural framework, the learned representation is optimized to minimize both the supervised loss on query-document matching and the unsupervised loss on text reconstruction. Standard retrieval experiments on TREC collections indicate that the joint learning methodology leads to significant better performance of retrieval over several strong baselines for IR.},
  url       = {http://www.aclweb.org/anthology/C18-1025}
}

@InProceedings{wei-meurers:2018:C18-1,
  author    = {Weiß, Zarah  and  Meurers, Detmar},
  title     = {Modeling the Readability of German Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {303--317},
  abstract  = {We analyze two novel data sets of German educational media texts},
  url       = {http://www.aclweb.org/anthology/C18-1026}
}

@InProceedings{tajner-hulpus:2018:C18-1,
  author    = {Štajner, Sanja  and  Hulpus, Ioana},
  title     = {Automatic Assessment of Conceptual Text Complexity Using Knowledge Graphs},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {318--330},
  abstract  = {Complexity of texts is usually assessed only at the lexical and syntactic levels. Although it is known that conceptual complexity plays a significant role in text understanding, no attempts have been made at assessing it automatically. We propose to automatically estimate the conceptual complexity of texts by exploiting a number of graph-based measures on a large knowledge base. By using a high-quality language learners corpus for English, we show that graph-based measures of individual text concepts, as well as the way they relate to each other in the knowledge graph, have a high discriminative power when distinguishing between two versions of the same text.},
  url       = {http://www.aclweb.org/anthology/C18-1027}
}

@InProceedings{yimam-biemann:2018:C18-1,
  author    = {Yimam, Seid Muhie  and  Biemann, Chris},
  title     = {Par4Sim -- Adaptive Paraphrasing for Text Simplification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {331--342},
  abstract  = {Learning from a real-world data stream and continuously updating the model without explicit supervision is a new challenge for NLP applications with machine learning components. In this work, we have developed an adaptive learning system for text simplification, which improves the underlying learning-to-rank model from usage data, i.e. how users have employed the system for the task of simplification. Our experimental result shows that, over a period of time, the performance of the embedded paraphrase ranking model increases steadily improving from a score of 62.88% up to 75.70% based on the NDCG$@$10 evaluation metrics. To our knowledge, this is the first study where an NLP component is adaptively improved through usage.},
  url       = {http://www.aclweb.org/anthology/C18-1028}
}

@InProceedings{sari-stevenson-vlachos:2018:C18-1,
  author    = {Sari, Yunita  and  Stevenson, Mark  and  Vlachos, Andreas},
  title     = {Topic or Style? Exploring the Most Useful Features for Authorship Attribution},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {343--353},
  abstract  = {Approaches to authorship attribution, the task of identifying the author of a document, are based on analysis of individuals' writing style and/or preferred topics. Although the problem has been widely explored, no previous studies have analysed the relationship between dataset characteristics and effectiveness of different types of features. This study carries out an analysis of four widely used datasets to explore how different types of features affect authorship attribution accuracy under varying conditions. The results of the analysis are applied to authorship attribution models based on both discrete and continuous representations. We apply the conclusions from our analysis to an extension of an existing approach to authorship attribution and outperform the prior state-of-the-art on two out of the four datasets used.},
  url       = {http://www.aclweb.org/anthology/C18-1029}
}

@InProceedings{le-EtAl:2018:C18-1,
  author    = {Le, Minh  and  Postma, Marten  and  Urbani, Jacopo  and  Vossen, Piek},
  title     = {A Deep Dive into Word Sense Disambiguation with LSTM},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {354--365},
  abstract  = {LSTM-based language models have been shown effective in Word Sense Disambiguation (WSD). In particular, the technique proposed by Yuan et al. (2016) returned state-of-the-art performance in several benchmarks, but neither the training data nor the source code was released. This paper presents the results of a reproduction study and analysis of this technique using only openly available datasets (GigaWord, SemCor, OMSTI) and software (TensorFlow). Our study showed that similar results can be obtained with much less data than hinted at by Yuan et al. (2016). Detailed analyses shed light on the strengths and weaknesses of this method. First, adding more unannotated training data is useful, but is subject to diminishing returns. Second, the model can correctly identify both popular and unpopular meanings. Finally, the limited sense coverage in the annotated datasets is a major limitation. All code and trained models are made freely available.},
  url       = {http://www.aclweb.org/anthology/C18-1030}
}

@InProceedings{jiang-EtAl:2018:C18-12,
  author    = {Jiang, Zhiwei  and  Gu, Qing  and  Yin, Yafeng  and  Chen, Daoxu},
  title     = {Enriching Word Embeddings with Domain Knowledge for Readability Assessment},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {366--378},
  abstract  = {In this paper, we present a method which learns the word embedding for readability assessment. For the existing word embedding models, they typically focus on the syntactic or semantic relations of words, while ignoring the reading difficulty, thus they may not be suitable for readability assessment. Hence, we provide the knowledge-enriched word embedding (KEWE), which encodes the knowledge on reading difficulty into the representation of words. Specifically, we extract the knowledge on word-level difficulty from three perspectives to construct a knowledge graph, and develop two word embedding models to incorporate the difficulty context derived from the knowledge graph to define the loss functions. Experiments are designed to apply KEWE for readability assessment on both English and Chinese datasets, and the results demonstrate both effectiveness and potential of KEWE.},
  url       = {http://www.aclweb.org/anthology/C18-1031}
}

@InProceedings{jana-EtAl:2018:C18-1,
  author    = {Jana, Abhik  and  Kanojiya, Pranjal  and  Goyal, Pawan  and  Mukherjee, Animesh},
  title     = {WikiRef: Wikilinks as a route to recommending appropriate references for scientific Wikipedia pages},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {379--389},
  abstract  = {The exponential increase in the usage of Wikipedia as a key source of scientific knowledge among the researchers is making it absolutely necessary to metamorphose this knowledge repository into an integral and self-contained source of information for direct utilization. Unfortunately, the references which support the content of each Wikipedia entity page, are far from complete. Why are the reference section ill-formed for most Wikipedia pages? Is this section edited as frequently as the other sections of a page? Can there be appropriate surrogates that can automatically enhance the reference section? In this paper, we propose a novel two step approach -- WikiRef -- that (i) leverages the wikilinks present in a scientific Wikipedia target page and, thereby, (ii) recommends highly relevant references to be included in that target page appropriately and automatically borrowed from the reference section of the wikilinks. In the first step, we build a classifier to ascertain whether a wikilink is a potential source of reference or not. In the following step, we recommend references to the target page from the reference section of the wikilinks that are classified as potential sources of references in the first step. We perform an extensive evaluation of our approach on datasets from two different domains -- Computer Science and Physics. For Computer Science we achieve a notably good performance with a precision$@$1 of 0.44 for reference recommendation as opposed to 0.38 obtained from the most competitive baseline. For the Physics dataset, we obtain a similar performance boost of 10% with respect to the most competitive baseline.},
  url       = {http://www.aclweb.org/anthology/C18-1032}
}

@InProceedings{alharthi-inkpen-szpakowicz:2018:C18-1,
  author    = {Alharthi, Haifa  and  Inkpen, Diana  and  Szpakowicz, Stan},
  title     = {Authorship Identification for Literary Book Recommendations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {390--400},
  abstract  = {Book recommender systems can help promote the practice of reading for pleasure, which has been declining in recent years. One factor that influences reading preferences is writing style. We propose a system that recommends books after learning their authors’ style. To our knowledge, this is the first work that applies the information learned by an author-identification model to book recommendations. We evaluated the system according to a top-k recommendation scenario. Our system gives better accuracy when compared with many state-of-the-art methods. We also conducted a qualitative analysis by checking if similar books/authors were annotated similarly by experts.},
  url       = {http://www.aclweb.org/anthology/C18-1033}
}

@InProceedings{evaldoleal-sanchesduran-mariaalusio:2018:C18-1,
  author    = {Evaldo Leal, Sidney  and  Sanches Duran, Magali  and  Maria Aluísio, Sandra},
  title     = {A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in Portuguese},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {401--413},
  abstract  = {Effective textual communication depends on readers being proficient enough to comprehend texts, and texts being clear enough to be understood by the intended audience, in a reading task. When the meaning of textual information and instructions is not well conveyed, many losses and damages may occur. Among the solutions to alleviate this problem is the automatic evaluation of sentence readability, task which has been receiving a lot of attention due to its large applicability. However, a shortage of resources, such as corpora for training and evaluation, hinders the full development of this task. },
  url       = {http://www.aclweb.org/anthology/C18-1034}
}

@InProceedings{du-zong-su:2018:C18-1,
  author    = {Du, Qianlong  and  Zong, Chengqing  and  Su, Keh-Yih},
  title     = {Adopting the Word-Pair-Dependency-Triplets with Individual Comparison for Natural Language Inference},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {414--425},
  abstract  = {This paper proposes to perform natural language inference with Word-Pair-Dependency-Triplets. Most previous DNN-based approaches either ignore syntactic dependency among words, or directly use tree-LSTM to generate sentence representation with irrelevant information. To overcome the problems mentioned above, we adopt Word-Pair-Dependency-Triplets to improve alignment and inference judgment. To be specific, instead of comparing each triplet from one passage with the merged information of another passage, we first propose to perform comparison directly between the triplets of the given passage-pair to make the judgement more interpretable. Experimental results show that the performance of our approach is better than most of the approaches that use tree structures, and is comparable to other state-of-the-art approaches.},
  url       = {http://www.aclweb.org/anthology/C18-1035}
}

@InProceedings{lei-EtAl:2018:C18-1,
  author    = {Lei, Kai  and  Chen, Daoyuan  and  Li, Yaliang  and  Du, Nan  and  Yang, Min  and  Fan, Wei  and  Shen, Ying},
  title     = {Cooperative Denoising for Distantly Supervised Relation Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {426--436},
  abstract  = {Distantly supervised relation extraction greatly reduces human efforts in extracting relational facts from unstructured texts. However, it suffers from noisy labeling problem, which can degrade its performance. Meanwhile, the useful information expressed in knowledge graph is still underutilized in the state-of-the-art methods for distantly supervised relation extraction. In the light of these challenges, we propose CORD, a novelCOopeRativeDenoising framework, which consists two base networks leveraging text corpus and knowledge graph respectively, and a cooperative module involving their mutual learning by the adaptive bi-directional knowledge distillation and dynamic ensemble with noisy-varying instances. Experimental results on a real-world dataset demonstrate that the proposed method reduces the noisy labels and achieves substantial improvement over the state-of-the-art methods.},
  url       = {http://www.aclweb.org/anthology/C18-1036}
}

@InProceedings{zou-EtAl:2018:C18-11,
  author    = {Zou, Bowei  and  Xu, Zengzhuang  and  Hong, Yu  and  Zhou, Guodong},
  title     = {Adversarial Feature Adaptation for Cross-lingual Relation Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {437--448},
  abstract  = {Relation Classification aims to classify the semantic relationship between two marked entities in a given sentence. It plays a vital role in a variety of natural language processing applications. Most existing methods focus on exploiting mono-lingual data, e.g., in English, due to the lack of annotated data in other languages. In this paper, we come up with a feature adaptation approach for cross-lingual relation classification, which employs a generative adversarial network (GAN) to transfer feature representations from one language with rich annotated data to another language},
  url       = {http://www.aclweb.org/anthology/C18-1037}
}

@InProceedings{zhang-zhao:2018:C18-1,
  author    = {Zhang, Zhuosheng  and  Zhao, Hai},
  title     = {One-shot Learning for Question-Answering in Gaokao History Challenge},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {449--461},
  abstract  = {Answering questions from university admission exams (Gaokao in Chinese) is a challenging AI task since it requires effective representation to capture complicated semantic relations between questions and answers. In this work, we propose a hybrid neural model for deep question-answering task from history examinations. Our model employs a cooperative gated neural network to retrieve answers with the assistance of extra labels given by a neural turing machine labeler. Empirical study shows that the labeler works well with only a small training dataset and the gated mechanism is good at fetching the semantic representation of lengthy answers. Experiments on question answering demonstrate the proposed model obtains substantial performance gains over various neural model baselines in terms of multiple evaluation metrics.},
  url       = {http://www.aclweb.org/anthology/C18-1038}
}

@InProceedings{guo-pasunuru-bansal:2018:C18-1,
  author    = {Guo, Han  and  Pasunuru, Ramakanth  and  Bansal, Mohit},
  title     = {Dynamic Multi-Level Multi-Task Learning for Sentence Simplification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {462--476},
  abstract  = {Sentence simplification aims to improve readability and understandability, based on several operations such as splitting, deletion, and paraphrasing. However, a valid simplified sentence should also be logically entailed by its input sentence. In this work, we first present a strong pointer-copy mechanism based sequence-to-sequence sentence simplification model, and then improve its entailment and paraphrasing capabilities via multi-task learning with related auxiliary tasks of entailment and paraphrase generation. Moreover, we propose a novel 'multi-level' layered soft sharing approach where each auxiliary task shares different (higher versus lower) level layers of the sentence simplification model, depending on the task's semantic versus lexico-syntactic nature. We also introduce a novel multi-armed bandit based training approach that dynamically learns how to effectively switch across tasks during multi-task learning. Experiments on multiple popular datasets demonstrate that our model outperforms competitive simplification systems in SARI and FKGL automatic metrics, and human evaluation. Further, we present several ablation analyses on alternative layer sharing methods, soft versus hard sharing, dynamic multi-armed bandit sampling approaches, and our model's learned entailment and paraphrasing skills.},
  url       = {http://www.aclweb.org/anthology/C18-1039}
}

@InProceedings{fukunaga-EtAl:2018:C18-1,
  author    = {Fukunaga, Shunya  and  Nishikawa, Hitoshi  and  Tokunaga, Takenobu  and  Yokono, Hikaru  and  Takahashi, Tetsuro},
  title     = {Interpretation of Implicit Conditions in Database Search Dialogues},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {477--486},
  abstract  = {Targeting the database search dialogue, we propose to utilise information in the user utterances that do not directly mention the database (DB) field of the backend database system but are useful for constructing database queries. We call this kind of information implicit conditions. Interpreting the implicit conditions enables the dialogue system more natural and efficient in communicating with humans. We formalised the interpretation of the implicit conditions as classifying user utterances into the related DB field while identifying the evidence for that classification at the same time. Introducing this new task is one of the contributions of this paper. We implemented two models for this task: an SVM-based model and an RCNN-based model. Through the evaluation using a corpus of simulated dialogues between a real estate agent and a customer, we found that the SVM-based model showed better performance than the RCNN-based model.},
  url       = {http://www.aclweb.org/anthology/C18-1040}
}

@InProceedings{hu-EtAl:2018:C18-1,
  author    = {Hu, Zikun  and  Li, Xiang  and  Tu, Cunchao  and  Liu, Zhiyuan  and  Sun, Maosong},
  title     = {Few-Shot Charge Prediction with Discriminative Legal Attributes},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {487--498},
  abstract  = {Automatic charge prediction aims to predict the final charges according to the fact descriptions in criminal cases and plays a crucial role in legal assistant systems. Existing works on charge prediction perform adequately on those high-frequency charges but are not yet capable of predicting few-shot charges with limited cases. Moreover, these exist many confusing charge pairs, whose fact descriptions are fairly similar to each other. To address these issues, we introduce several discriminative attributes of charges as the internal mapping between fact descriptions and charges. These attributes provide additional information for few-shot charges, as well as effective signals for distinguishing confusing charges. More specifically, we propose an attribute-attentive charge prediction model to infer the attributes and charges simultaneously. Experimental results on real-work datasets demonstrate that our proposed model achieves significant and consistent improvements than other state-of-the-art baselines. Specifically, our model outperforms other baselines by more than $50\%$ in the few-shot scenario. Our codes and datasets can be obtained from \url{https://github.com/thunlp/attribute\_charge}.},
  url       = {http://www.aclweb.org/anthology/C18-1041}
}

@InProceedings{gupta-EtAl:2018:C18-11,
  author    = {Gupta, Deepak  and  Pujari, Rajkumar  and  Ekbal, Asif  and  Bhattacharyya, Pushpak  and  Maitra, Anutosh  and  Jain, Tom  and  Sengupta, Shubhashis},
  title     = {Can Taxonomy Help? Improving Semantic Question Matching using Question Taxonomy},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {499--513},
  abstract  = {In this paper, we propose a hybrid technique for semantic question matching. It uses a proposed two-layered taxonomy for English questions by augmenting state-of-the-art deep learning models with question classes obtained from a deep learning based question classifier. Experiments performed on three open-domain datasets demonstrate the effectiveness of our proposed approach. We achieve state-of-the-art results on partial ordering question ranking (POQR) benchmark dataset. Our empirical analysis shows that coupling standard distributional features (provided by the question encoder) with knowledge from taxonomy is more effective than either deep learning or taxonomy-based knowledge alone.},
  url       = {http://www.aclweb.org/anthology/C18-1042}
}

@InProceedings{hosu-EtAl:2018:C18-1,
  author    = {Hosu, Ionel Alexandru  and  Iacob, Radu Cristian Alexandru  and  Brad, Florin  and  Ruseti, Stefan  and  Rebedea, Traian},
  title     = {Natural Language Interface for Databases Using a Dual-Encoder Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {514--524},
  abstract  = {We propose a sketch-based two-step neural model for generating structured queries (SQL) based on a user's request in natural language. The sketch is obtained by using placeholders for specific entities in the SQL query, such as column names, table names, aliases and variables, in a process similar to semantic parsing. The first step is to apply a sequence-to-sequence (SEQ2SEQ) model to determine the most probable SQL sketch based on the request in natural language. Then, a second network designed as a dual-encoder SEQ2SEQ model using both the text query and the previously obtained sketch is employed to generate the final SQL query. Our approach shows improvements over previous approaches on two recent large datasets (WikiSQL and SENLIDB) suitable for data-driven solutions for natural language interfaces for databases.},
  url       = {http://www.aclweb.org/anthology/C18-1043}
}

@InProceedings{xu-EtAl:2018:C18-12,
  author    = {Xu, Sheng  and  Li, Peifeng  and  Zhou, Guodong  and  Zhu, Qiaoming},
  title     = {Employing Text Matching Network to Recognise Nuclearity in Chinese Discourse},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {525--535},
  abstract  = {The task of nuclearity recognition in Chinese discourse remains challenging due to the demand for more deep semantic information. In this paper, we propose a novel text matching network (TMN) that encodes the discourse units and the paragraphs by combining Bi-LSTM and CNN to capture both global dependency information and local n-gram information. Moreover, it introduces three components of text matching, the Cosine, Bilinear and Single Layer Network, to incorporate various similarities and interactions among the discourse units. Experimental results on the Chinese Discourse TreeBank show that our proposed TMN model significantly outperforms various strong baselines in both micro-F1 and macro-F1.},
  url       = {http://www.aclweb.org/anthology/C18-1044}
}

@InProceedings{chu-EtAl:2018:C18-1,
  author    = {Chu, Xiaomin  and  Jiang, Feng  and  Zhou, Yi  and  Zhou, Guodong  and  Zhu, Qiaoming},
  title     = {Joint Modeling of Structure Identification and Nuclearity Recognition in Macro Chinese Discourse Treebank},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {536--546},
  abstract  = {Discourse parsing is a challenging task and plays a critical role in discourse analysis. This paper focus on the macro level discourse structure analysis, which has been less studied in the previous researches. We explore a macro discourse structure presentation schema to present the macro level discourse structure, and propose a corresponding corpus, named Macro Chinese Discourse Treebank. On these bases, we concentrate on two tasks of macro discourse structure analysis, including structure identification and nuclearity recognition. In order to reduce the error transmission between the associated tasks, we adopt a joint model of the two tasks, and an Integer Linear Programming approach is proposed to achieve global optimization with various kinds of constraints.},
  url       = {http://www.aclweb.org/anthology/C18-1045}
}

@InProceedings{guo-EtAl:2018:C18-1,
  author    = {Guo, Fengyu  and  He, Ruifang  and  Jin, Di  and  Dang, Jianwu  and  Wang, Longbiao  and  Li, Xiangang},
  title     = {Implicit Discourse Relation Recognition using Neural Tensor Network with Interactive Attention and Sparse Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {547--558},
  abstract  = {Implicit discourse relation recognition aims to understand and annotate the latent relations between two discourse arguments, such as temporal, comparison, etc. Most previous methods encode two discourse arguments separately, the ones considering pair specific clues ignore the bidirectional interactions between two arguments and the sparsity of pair patterns. In this paper, we propose a novel neural Tensor network framework with Interactive Attention and Sparse Learning (TIASL) for implicit discourse relation recognition. (1) We mine the most correlated word pairs from two discourse arguments to model pair specific clues, and integrate them as interactive attention into argument representations produced by the bidirectional long short-term},
  url       = {http://www.aclweb.org/anthology/C18-1046}
}

@InProceedings{yu-zhang-fu:2018:C18-1,
  author    = {Yu, Nan  and  Zhang, Meishan  and  Fu, Guohong},
  title     = {Transition-based Neural RST Parsing with Implicit Syntax Features},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {559--570},
  abstract  = {Syntax has been a useful source of information for statistical RST discourse parsing. Under the neural setting, a common approach integrates syntax by a recursive neural network (RNN), requiring discrete output trees produced by a supervised syntax parser. In this paper, we propose an implicit syntax feature extraction approach, using hidden-layer vectors extracted from a neural syntax parser. In addition, we propose a simple transition-based model as the baseline, further enhancing it with dynamic oracle. Experiments on the standard dataset show that our baseline model with dynamic oracle is highly competitive. When implicit syntax features are integrated, we are able to obtain further improvements, better than using explicit Tree-RNN.},
  url       = {http://www.aclweb.org/anthology/C18-1047}
}

@InProceedings{bai-zhao:2018:C18-1,
  author    = {Bai, Hongxiao  and  Zhao, Hai},
  title     = {Deep Enhanced Representation for Implicit Discourse Relation Recognition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {571--583},
  abstract  = {Implicit discourse relation recognition is a challenging task as the relation prediction without explicit connectives in discourse parsing needs understanding of text spans and cannot be easily derived from surface features from the input sentence pairs. Thus, properly representing the text is very crucial to this task. In this paper, we propose a model augmented with different grained text representations, including character, subword, word, sentence, and sentence pair levels. The proposed deeper model is evaluated on the benchmark treebank and achieves state-of-the-art accuracy with greater than 48% in 11-way and F1 score greater than 50% in 4-way classifications for the first time according to our best knowledge.},
  url       = {http://www.aclweb.org/anthology/C18-1048}
}

@InProceedings{kishimoto-murawaki-kurohashi:2018:C18-1,
  author    = {Kishimoto, Yudai  and  Murawaki, Yugo  and  Kurohashi, Sadao},
  title     = {A Knowledge-Augmented Neural Network Model for Implicit Discourse Relation Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {584--595},
  abstract  = {Identifying discourse relations that are not overtly marked with discourse connectives remains a challenging problem. The absence of explicit clues indicates a need for the combination of world knowledge and weak contextual clues, which can hardly be learned from a small amount of manually annotated data. In this paper, we address this problem by augmenting the input text with external knowledge and context and by adopting a neural network model that can effectively handle the augmented text. Experiments show that external knowledge did improve the classification accuracy. Contextual information provided no significant gain for implicit discourse relations, but it did for explicit ones.},
  url       = {http://www.aclweb.org/anthology/C18-1049}
}

@InProceedings{kuang-EtAl:2018:C18-1,
  author    = {Kuang, Shaohui  and  Xiong, Deyi  and  Luo, Weihua  and  Zhou, Guodong},
  title     = {Modeling Coherence for Neural Machine Translation with Dynamic and Topic Caches},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {596--606},
  abstract  = {Sentences in a well-formed text are connected to each other via various links to form the cohesive structure of the text. Current neural machine translation (NMT) systems translate a text in a conventional sentence-by-sentence fashion, ignoring such cross-sentence links and dependencies. This may lead to generate an incoherent target text for a coherent source text. In order to handle this issue, we propose a cache-based approach to modeling coherence for neural machine translation by capturing contextual information either from recently translated sentences or the entire document. Particularly, we explore two types of caches: a dynamic cache, which stores words from the best translation hypotheses of preceding sentences, and a topic cache, which maintains a set of target-side topical words that are semantically related to the document to be translated. On this basis, we build a new layer to score target words in these two caches with a cache-based neural model. Here the estimated probabilities from the cache-based neural model are combined with NMT probabilities into the final word prediction probabilities via a gating mechanism. Finally, the proposed cache-based neural model is trained jointly with NMT system in an end-to-end manner. Experiments and analysis presented in this paper demonstrate that the proposed cache-based model achieves substantial improvements over several state-of-the-art SMT and NMT baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1050}
}

@InProceedings{kuang-xiong:2018:C18-1,
  author    = {Kuang, Shaohui  and  Xiong, Deyi},
  title     = {Fusing Recency into Neural Machine Translation with an Inter-Sentence Gate Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {607--617},
  abstract  = {Neural machine translation (NMT) systems are usually trained on a large amount of bilingual sentence pairs and translate one sentence at a time, ignoring inter-sentence information. This may make the translation of a sentence ambiguous or even inconsistent with the translations of neighboring sentences. In order to handle this issue, we propose an inter-sentence gate model that uses the same encoder to encode two adjacent sentences and controls the amount of information flowing from the preceding sentence to the translation of the current sentence with an inter-sentence gate. In this way, our proposed model can capture the connection between sentences and fuse recency from neighboring sentences into neural machine translation. On several NIST Chinese-English translation tasks, our experiments demonstrate that the proposed inter-sentence gate model achieves substantial improvements over the baseline.},
  url       = {http://www.aclweb.org/anthology/C18-1051}
}

@InProceedings{morishita-suzuki-nagata:2018:C18-1,
  author    = {Morishita, Makoto  and  Suzuki, Jun  and  Nagata, Masaaki},
  title     = {Improving Neural Machine Translation by Incorporating Hierarchical Subword Features},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {618--629},
  abstract  = {This paper focuses on subword-based Neural Machine Translation (NMT).},
  url       = {http://www.aclweb.org/anthology/C18-1052}
}

@InProceedings{merhav-ash:2018:C18-1,
  author    = {Merhav, Yuval  and  Ash, Stephen},
  title     = {Design Challenges in Named Entity Transliteration},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {630--640},
  abstract  = {We analyze some of the fundamental design challenges that impact the development of a multilingual state-of-the-art named entity transliteration system, including curating bilingual named entity datasets and evaluation of multiple transliteration methods. We empirically evaluate the transliteration task using the traditional weighted finite state transducer (WFST) approach against two neural approaches: the encoder-decoder recurrent neural network method and the recent, non-sequential Transformer method. In order to improve availability of bilingual named entity transliteration datasets, we release personal name bilingual dictionaries mined from Wikidata for English to Russian, Hebrew, Arabic, and Japanese Katakana. Our code and dictionaries are publicly available.},
  url       = {http://www.aclweb.org/anthology/C18-1053}
}

@InProceedings{lakew-cettolo-federico:2018:C18-1,
  author    = {Lakew, Surafel Melaku  and  Cettolo, Mauro  and  Federico, Marcello},
  title     = {A Comparison of Transformer and Recurrent Neural Networks on Multilingual Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {641--652},
  abstract  = {Recently, neural machine translation (NMT) has been extended to multilinguality, that is to handle more than one translation direction with a single system. Multilingual NMT showed competitive performance against pure bilingual systems. Notably, in low-resource settings, it proved to work effectively and efficiently, thanks to shared representation space that is forced across languages and induces a sort of transfer-learning. Furthermore, multilingual NMT enables so-called zero-shot inference across language pairs never seen at training time. Despite the increasing interest},
  url       = {http://www.aclweb.org/anthology/C18-1054}
}

@InProceedings{ebrahimi-lowd-dou:2018:C18-1,
  author    = {Ebrahimi, Javid  and  Lowd, Daniel  and  Dou, Dejing},
  title     = {On Adversarial Examples for Character-Level Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {653--663},
  abstract  = {Evaluating on adversarial examples has become a standard procedure to measure robustness of deep learning models. Due to the difficulty of creating white-box adversarial examples for discrete text input, most analyses of the robustness of NLP models have been done through black-box adversarial examples. },
  url       = {http://www.aclweb.org/anthology/C18-1055}
}

@InProceedings{ilievski-vossen-schlobach:2018:C18-1,
  author    = {Ilievski, Filip  and  Vossen, Piek  and  Schlobach, Stefan},
  title     = {Systematic Study of Long Tail Phenomena in Entity Linking},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {664--674},
  abstract  = {State-of-the-art entity linkers achieve high accuracy scores with probabilistic methods. However, these scores should be considered in relation to the properties of the datasets they are evaluated on. Until now, there has not been a systematic investigation of the properties of entity linking datasets and their impact on system performance. In this paper we report on a series of hypotheses regarding the long tail phenomena in entity linking datasets, their interaction, and their impact on system performance. Our systematic study of these hypotheses shows that evaluation datasets mainly capture head entities and only incidentally cover data from the tail, thus encouraging systems to overfit to popular/frequent and non-ambiguous cases. We find the most difficult cases of entity linking among the infrequent candidates of ambiguous forms. With our findings, we hope to inspire future designs of both entity linking systems and evaluation datasets. To support this goal, we provide a list of recommended actions for better inclusion of tail cases.},
  url       = {http://www.aclweb.org/anthology/C18-1056}
}

@InProceedings{cao-EtAl:2018:C18-1,
  author    = {Cao, Yixin  and  Hou, Lei  and  Li, Juanzi  and  Liu, Zhiyuan},
  title     = {Neural Collective Entity Linking},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {675--686},
  abstract  = {Entity Linking aims to link entity mentions in texts to knowledge bases, and neural models have achieved recent success in this task. However, most existing methods rely on local contexts to resolve entities independently, which may usually fail due to the data sparsity of local information. To address this issue, we propose a novel neural model for collective entity linking, named as NCEL. NCEL apply Graph Convolutional Network to integrate both local contextual features and global coherence information for entity linking. To improve the computation efficiency, we approximately perform graph convolution on a subgraph of adjacent entity mentions instead of those in the entire text. We further introduce an attention scheme to improve the robustness of NCEL to data noise and train the model on Wikipedia hyperlinks to avoid overfitting and domain bias. In experiments, we evaluate NCEL on five publicly available datasets to verify the linking performance as well as generalization ability. We also conduct an extensive analysis of time complexity, the impact of key modules, and qualitative results, which demonstrate the effectiveness and efficiency of our proposed method.},
  url       = {http://www.aclweb.org/anthology/C18-1057}
}

@InProceedings{bhutani-EtAl:2018:C18-1,
  author    = {Bhutani, Nikita  and  Qian, Kun  and  Li, Yunyao  and  Jagadish, H. V.  and  Hernandez, Mauricio  and  Vasa, Mitesh},
  title     = {Exploiting Structure in Representation of Named Entities using Active Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {687--699},
  abstract  = {Fundamental to several knowledge-centric applications is the need to identify named entities from their textual mentions. However, entities lack a unique representation and their mentions can differ greatly. These variations arise in complex ways that cannot be captured using textual similarity metrics. However, entities have underlying structures, typically shared by entities of the same entity type, that can help reason over their name variations. Discovering, learning and manipulating these structures typically requires high manual effort in the form of large amounts of labeled training data and handwritten transformation programs. In this work, we propose an active-learning based framework that drastically reduces the labeled data required to learn the structures of entities. We show that programs for mapping entity mentions to their structures can be automatically generated using human-comprehensible labels. Our experiments show that our framework consistently outperforms both handwritten programs and supervised learning models. We also demonstrate the utility of our framework in relation extraction and entity resolution tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1058}
}

@InProceedings{alolimat-EtAl:2018:C18-11,
  author    = {Al-Olimat, Hussein  and  Gustafson, Steven  and  Mackay, Jason  and  Thirunarayan, Krishnaprasad  and  Sheth, Amit},
  title     = {A Practical Incremental Learning Framework For Sparse Entity Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {700--710},
  abstract  = {This work addresses challenges arising from extracting entities from textual data, including the high cost of data annotation, model accuracy, selecting appropriate evaluation criteria, and the overall quality of annotation. We present a framework that integrates Entity Set Expansion (ESE) and Active Learning (AL) to reduce the annotation cost of sparse data and provide an online evaluation method as feedback. This incremental and interactive learning framework allows for rapid annotation and subsequent extraction of sparse data while maintaining high accuracy.},
  url       = {http://www.aclweb.org/anthology/C18-1059}
}

@InProceedings{mai-EtAl:2018:C18-1,
  author    = {Mai, Khai  and  Pham, Thai-Hoang  and  Nguyen, Minh Trung  and  Tuan Duc, Nguyen  and  Bollegala, Danushka  and  Sasano, Ryohei  and  Sekine, Satoshi},
  title     = {An Empirical Study on Fine-Grained Named Entity Recognition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {711--722},
  abstract  = {Named entity recognition (NER) has attracted a substantial amount of research. Recently, several neural network-based models have been proposed and achieved high performance. However, there is little research on fine-grained NER (FG-NER), in which hundreds of named entity categories must be recognized, especially for non-English languages. It is still an open question whether there is a model that is robust across various settings or the proper model varies depending on the language, the number of named entity categories, and the size of training datasets. This paper first presents an empirical comparison of FG-NER models for English and Japanese and demonstrates that LSTM+CNN+CRF (Ma and Hovy, 2016), one of the state-of-the-art methods for English NER, also works well for English FG-NER but does not work well for Japanese, a language that has a large number of character types. To tackle this problem, we propose a method to improve the neural network-based Japanese FG-NER performance by removing the CNN layer and utilizing dictionary and category embeddings. Experiment results show that the proposed method improves Japanese FG-NER F-score from 66.76% to 75.18%.},
  url       = {http://www.aclweb.org/anthology/C18-1060}
}

@InProceedings{zhang-EtAl:2018:C18-11,
  author    = {Zhang, Yi  and  Sun, Xu  and  Ma, Shuming  and  Yang, Yang  and  Ren, Xuancheng},
  title     = {Does Higher Order LSTM Have Better Accuracy for Segmenting and Labeling Sequence Data?},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {723--733},
  abstract  = {Existing neural models usually predict the tag of the current token independent of the neighboring tags. The popular LSTM-CRF model considers the tag dependencies between every two consecutive tags. However, it is hard for existing neural models to take longer distance dependencies between tags into consideration. The scalability is mainly limited by the complex model structures and the cost of dynamic programming during training. In our work, we first design a new model called ``high order LSTM'' to predict multiple tags for the current token which contains not only the current tag but also the previous several tags. We call the number of tags in one prediction as "order". Then we propose a new method called Multi-Order BiLSTM (MO-BiLSTM) which combines low order and high order LSTMs together. MO-BiLSTM keeps the scalability to high order models with a pruning technique. We evaluate MO-BiLSTM on all-phrase chunking and NER datasets. Experiment results show that MO-BiLSTM achieves the state-of-the-art result in chunking and highly competitive results in two NER datasets.},
  url       = {http://www.aclweb.org/anthology/C18-1061}
}

@InProceedings{alsaleh-menai:2018:C18-1,
  author    = {Al-Saleh, Asma  and  Menai, Mohamed El Bachir},
  title     = {Ant Colony System for Multi-Document Summarization},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {734--744},
  abstract  = {This paper proposes an extractive multi-document summarization approach based on an ant colony system to optimize the information coverage of summary sentences. The implemented system was evaluated on both English and Arabic versions of the corpus of the Text Analysis Conference 2011 MultiLing Pilot by using ROUGE metrics. The evaluation results are promising in comparison to those of the participating systems. Indeed, our system achieved the best scores based on several ROUGE metrics.},
  url       = {http://www.aclweb.org/anthology/C18-1062}
}

@InProceedings{cerisara-EtAl:2018:C18-1,
  author    = {Cerisara, Christophe  and  Jafaritazehjani, Somayeh  and  Oluokun, Adedayo  and  Le, Hoa T.},
  title     = {Multi-task dialog act and sentiment recognition on Mastodon},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {745--754},
  abstract  = {Because of license restrictions, it often becomes impossible to strictly reproduce most research results},
  url       = {http://www.aclweb.org/anthology/C18-1063}
}

@InProceedings{rogers-EtAl:2018:C18-1,
  author    = {Rogers, Anna  and  Romanov, Alexey  and  Rumshisky, Anna  and  Volkova, Svitlana  and  Gronas, Mikhail  and  Gribov, Alex},
  title     = {RuSentiment: An Enriched Sentiment Analysis Dataset for Social Media in Russian},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {755--763},
  abstract  = {This paper presents RuSentiment, a new dataset for sentiment analysis of social media posts in Russian, and a new set of comprehensive annotation guidelines that are extensible to other languages. RuSentiment is currently the largest in its class for Russian, with 31,185 posts annotated with Fleiss' kappa of 0.58 (3 annotations per post). To diversify the dataset, 6,950 posts were pre-selected with an active learning-style strategy. We report baseline classification results, and we also release the best-performing embeddings trained on 3.2B tokens of Russian VKontakte posts.},
  url       = {http://www.aclweb.org/anthology/C18-1064}
}

@InProceedings{goldberger-melamud:2018:C18-1,
  author    = {Goldberger, Jacob  and  Melamud, Oren},
  title     = {Self-Normalization Properties of Language Modeling},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {764--773},
  abstract  = {Self-normalizing discriminative models approximate the normalized probability of a class without having to compute the partition function. In the context of language modeling, this property is particularly appealing as it may significantly reduce run-times due to large word vocabularies. In this study, we provide a comprehensive investigation of language modeling self-normalization. First, we theoretically analyze the inherent self-normalization properties of Noise Contrastive Estimation (NCE) language models. Then, we compare them empirically to softmax-based approaches, which are self-normalized using explicit regularization, and suggest a hybrid model with compelling properties. Finally, we uncover a surprising negative correlation between self-normalization and perplexity across the board, as well as some regularity in the observed errors, which may potentially be used for improving self-normalization algorithms in the future.},
  url       = {http://www.aclweb.org/anthology/C18-1065}
}

@InProceedings{gu-EtAl:2018:C18-11,
  author    = {Gu, Shuqin  and  Zhang, Lipeng  and  Hou, Yuexian  and  Song, Yin},
  title     = {A Position-aware Bidirectional Attention Network for Aspect-level Sentiment Analysis},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {774--784},
  abstract  = {Aspect-level sentiment analysis aims to distinguish the sentiment polarity of each specific aspect term in a given sentence. Both industry and academia have realized the importance of the relationship between aspect term and sentence, and made attempts to model the relationship by designing a series of attention models. However, most existing methods usually neglect the fact that the position information is also crucial for identifying the sentiment polarity of the aspect term. When an aspect term occurs in a sentence, its neighboring words should be given more attention than other words with long distance. Therefore, we propose a position-aware bidirectional attention network (PBAN) based on bidirectional GRU.},
  url       = {http://www.aclweb.org/anthology/C18-1066}
}

@InProceedings{kohita-noji-matsumoto:2018:C18-1,
  author    = {Kohita, Ryosuke  and  Noji, Hiroshi  and  Matsumoto, Yuji},
  title     = {Dynamic Feature Selection with Attention in Incremental Parsing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {785--794},
  abstract  = {One main challenge for incremental transition-based parsers, when future inputs are invisible, is to extract good features from a limited local context. In this work, we present a simple technique to maximally utilize the local features with an attention mechanism, which works as context- dependent dynamic feature selection. Our model learns, for example, which tokens should a parser focus on, to decide the next action. Our multilingual experiment shows its effectiveness across many languages. We also present an experiment with augmented test dataset and demon- strate it helps to understand the model’s behavior on locally ambiguous points.},
  url       = {http://www.aclweb.org/anthology/C18-1067}
}

@InProceedings{krishna-EtAl:2018:C18-1,
  author    = {Krishna, Kundan  and  Murhekar, Aniket  and  Sharma, Saumitra  and  Srinivasan, Balaji Vasan},
  title     = {Vocabulary Tailored Summary Generation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {795--805},
  abstract  = {Neural sequence-to-sequence models have been successfully extended for summary generation.However, existing frameworks generate a single summary for a given input and do not tune the summaries towards any additional constraints/preferences. Such a tunable framework is desirable to account for linguistic preferences of the specific audience who will consume the summary. In this paper, we propose a neural framework to generate summaries constrained to a vocabulary-defined linguistic preferences of a target audience. The proposed method accounts for the generation context by tuning the summary words at the time of generation. Our evaluations indicate that the proposed approach tunes summaries to the target vocabulary while still maintaining a superior summary quality against a state-of-the-art word embedding based lexical substitution algorithm, suggesting the feasibility of the proposed approach. We demonstrate two applications of the proposed approach - to generate understandable summaries with simpler words, and readable summaries with shorter words.},
  url       = {http://www.aclweb.org/anthology/C18-1068}
}

@InProceedings{sun-cheng-qu:2018:C18-1,
  author    = {Sun, Yawei  and  Cheng, Gong  and  Qu, Yuzhong},
  title     = {Reading Comprehension with Graph-based Temporal-Casual Reasoning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {806--817},
  abstract  = {Complex questions in reading comprehension tasks require integrating information from multiple sentences. In this work, to answer such questions involving temporal and causal relations, we generate event graphs from text based on dependencies, and rank answers by aligning event graphs. In particular, the alignments are constrained by graph-based reasoning to ensure temporal and causal agreement. Our focused approach self-adaptively complements existing solutions; it is automatically triggered only when applicable. Experiments on RACE and MCTest show that state-of-the-art methods are notably improved by using our approach as an add-on.},
  url       = {http://www.aclweb.org/anthology/C18-1069}
}

@InProceedings{barnes-klinger-schulteimwalde:2018:C18-1,
  author    = {Barnes, Jeremy  and  Klinger, Roman  and  Schulte im Walde, Sabine},
  title     = {Projecting Embeddings for Domain Adaption: Joint Modeling of Sentiment Analysis in Diverse Domains},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {818--830},
  abstract  = {Domain adaptation for sentiment analysis is challenging},
  url       = {http://www.aclweb.org/anthology/C18-1070}
}

@InProceedings{eger-EtAl:2018:C18-1,
  author    = {Eger, Steffen  and  Daxenberger, Johannes  and  Stab, Christian  and  Gurevych, Iryna},
  title     = {Cross-lingual Argumentation Mining: Machine Translation (and a bit of Projection) is All You Need!},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {831--844},
  abstract  = {Argumentation mining (AM) requires the identification of complex discourse structures and has lately been applied with success monolingually. In this work, we show that the existing resources are, however, not adequate for assessing cross-lingual AM, due to their heterogeneity or lack of complexity. We therefore create suitable parallel corpora by (human and machine) translating a popular AM dataset consisting of persuasive student essays into German, French, Spanish, and Chinese. We then compare (i) annotation projection and (ii) bilingual word embeddings based direct transfer strategies for cross-lingual AM, finding that the former performs considerably better and almost eliminates the loss from cross-lingual transfer. Moreover, we find that annotation projection works equally well when using either costly human or cheap machine translations. Our code and data are available at http://github.com/UKPLab/coling2018-xling\_argument\_mining.},
  url       = {http://www.aclweb.org/anthology/C18-1071}
}

@InProceedings{wu-EtAl:2018:C18-1,
  author    = {Wu, Sixing  and  Zhang, Dawei  and  Li, Ying  and  Xie, Xing  and  Wu, Zhonghai},
  title     = {HL-EncDec: A Hybrid-Level Encoder-Decoder for Neural Response Generation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {845--856},
  abstract  = {Recent years have witnessed a surge of interest on response generation for neural conversation systems. Most existing models are implemented by following the Encoder-Decoder framework and operate sentences of conversations at word-level. The word-level model is suffering from the Unknown Words Issue and the Preference Issue, which seriously impact the quality of generated responses, for example, generated responses may become irrelevant or too general (i.e. safe responses). To address these issues, this paper proposes a hybrid-level Encoder-Decoder model (HL-EncDec), which not only utilizes the word-level features but also character-level features. We conduct several experiments to evaluate HL-EncDec on a Chinese corpus, experimental results show our model significantly outperforms other non-word-level models in automatic metrics and human annotations and is able to generate more informative responses. We also conduct experiments with a small-scale English dataset to show the generalization ability.},
  url       = {http://www.aclweb.org/anthology/C18-1072}
}

@InProceedings{wang-EtAl:2018:C18-11,
  author    = {Wang, Liang  and  Li, Sujian  and  Zhao, Wei  and  Shen, Kewei  and  Sun, Meng  and  Jia, Ruoyu  and  Liu, Jingming},
  title     = {Multi-Perspective Context Aggregation for Semi-supervised Cloze-style Reading Comprehension},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {857--867},
  abstract  = {Cloze-style reading comprehension has been a popular task for measuring the progress of natural language understanding in recent years. In this paper, we design a novel multi-perspective framework, which can be seen as the joint training of heterogeneous experts and aggregate context information from different perspectives. Each perspective is modeled by a simple aggregation module. The outputs of multiple aggregation modules are fed into a one-timestep pointer network to get the final answer. At the same time, to tackle the problem of insufficient labeled data, we propose an efficient sampling mechanism to automatically generate more training examples by matching the distribution of candidates between labeled and unlabeled data. We conduct our experiments on a recently released cloze-test dataset CLOTH (Xie et al., 2017), which consists of nearly 100k questions designed by professional teachers. Results show that our method achieves new state-of-the-art performance over previous strong baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1073}
}

@InProceedings{zou-EtAl:2018:C18-12,
  author    = {Zou, Yicheng  and  Gui, Tao  and  Zhang, Qi  and  Huang, Xuanjing},
  title     = {A Lexicon-Based Supervised Attention Model for Neural Sentiment Analysis},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {868--877},
  abstract  = {Attention mechanisms have been leveraged for sentiment classification tasks because not all words have the same importance. However, most existing attention models did not take full advantage of sentiment lexicons, which provide rich sentiment information and play a critical role in sentiment analysis. To achieve the above target, in this work, we propose a novel lexicon-based supervised attention model (LBSA), which allows a recurrent neural network to focus on the sentiment content, thus generating sentiment-informative representations. Compared with general attention models, our model has better interpretability and less noise. Experimental results on three large-scale sentiment classification datasets showed that the proposed method outperforms previous methods.},
  url       = {http://www.aclweb.org/anthology/C18-1074}
}

@InProceedings{araki-mitamura:2018:C18-1,
  author    = {Araki, Jun  and  Mitamura, Teruko},
  title     = {Open-Domain Event Detection using Distant Supervision},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {878--891},
  abstract  = {This paper introduces open-domain event detection, a new event detection paradigm to address issues of prior work on restricted domains and event annotation. The goal is to detect all kinds of events regardless of domains. Given the absence of training data, we propose a distant supervision method that is able to generate high-quality training data. Using a manually annotated event corpus as gold standard, our experiments show that despite no direct supervision, the model outperforms supervised models. This result indicates that the distant supervision enables robust event detection in various domains, while obviating the need for human annotation of events.},
  url       = {http://www.aclweb.org/anthology/C18-1075}
}

@InProceedings{chen-EtAl:2018:C18-11,
  author    = {Chen, Bo  and  An, Bo  and  Sun, Le  and  Han, Xianpei},
  title     = {Semi-Supervised Lexicon Learning for Wide-Coverage Semantic Parsing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {892--904},
  abstract  = {Semantic parsers critically rely on accurate and high-coverage lexicons.},
  url       = {http://www.aclweb.org/anthology/C18-1076}
}

@InProceedings{shafieibavani-EtAl:2018:C18-1,
  author    = {ShafieiBavani, Elaheh  and  Ebrahimi, Mohammad  and  Wong, Raymond  and  Chen, Fang},
  title     = {Summarization Evaluation in the Absence of Human Model Summaries Using the Compositionality of Word Embeddings},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {905--914},
  abstract  = {We present a new summary evaluation approach that does not require human model summaries. Our approach exploits the compositional capabilities of corpus-based and lexical resource-based word embeddings to develop the features reflecting coverage, diversity, informativeness, and coherence of summaries. The features are then used to train a learning model for predicting the summary content quality in the absence of gold models. We evaluate the proposed metric in replicating the human assigned scores for summarization systems and summaries on data from query-focused and update summarization tasks in TAC 2008 and 2009. The results show that our feature combination provides reliable estimates of summary content quality when model summaries are not available.},
  url       = {http://www.aclweb.org/anthology/C18-1077}
}

@InProceedings{jimnezzafra-EtAl:2018:C18-1,
  author    = {Jiménez-Zafra, Salud María  and  Morante, Roser  and  Martin, Maite  and  Urena Lopez, L. Alfonso},
  title     = {A review of Spanish corpora annotated with negation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {915--924},
  abstract  = {The availability of corpora annotated with negation information is essential to develop negation processing systems in any language. However, there is a lack of these corpora even for languages like English, and when there are corpora available they are small and the annotations are not always compatible across corpora. In this paper we review the existing corpora annotated with negation in Spanish with the purpose of first, gathering the information to make it available for other researchers and, second, analyzing how compatible are the corpora and how has the linguistic phenomenon been addressed. Our final aim is to develop a supervised negation processing system for Spanish, for which we need training and test data. Our analysis shows that it will not be possible to merge the small corpora existing for Spanish due to lack of compatibility in the annotations.},
  url       = {http://www.aclweb.org/anthology/C18-1078}
}

@InProceedings{li-yang-zong:2018:C18-1,
  author    = {Li, Junjie  and  Yang, Haitong  and  Zong, Chengqing},
  title     = {Document-level Multi-aspect Sentiment Classification by Jointly Modeling Users, Aspects, and Overall Ratings},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {925--936},
  abstract  = {Document-level multi-aspect sentiment classification aims to predict user's sentiment polarities for different aspects of a product in a review. Existing approaches mainly focus on text information. However, the authors (i.e. users) and overall ratings of reviews are ignored, both of which are proved to be significant on interpreting the sentiments of different aspects in this paper. Therefore, we propose a model called Hierarchical User Aspect Rating Network (HUARN) to consider user preference and overall ratings jointly. Specifically, HUARN adopts a hierarchical architecture to encode word, sentence, and document level information. Then, user attention and aspect attention are introduced into building sentence and document level representation. The document representation is combined with user and overall rating information to predict aspect ratings of a review. Diverse aspects are treated differently and a multi-task framework is adopted. Empirical results on two real-world datasets show that HUARN achieves state-of-the-art performances.},
  url       = {http://www.aclweb.org/anthology/C18-1079}
}

@InProceedings{hazem-morin:2018:C18-1,
  author    = {Hazem, Amir  and  Morin, Emmanuel},
  title     = {Leveraging Meta-Embeddings for Bilingual Lexicon Extraction from Specialized Comparable Corpora},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {937--949},
  abstract  = {Recent evaluations on bilingual lexicon extraction from specialized comparable corpora have shown contrasted performance while using word embedding models. This can be partially explained by the lack of large specialized comparable corpora to build efficient representations. Within this context, we try to answer the following questions: First, (i) among the state-of-the-art embedding models, whether trained on specialized corpora or pre-trained on large general data sets, which one is the most appropriate model for bilingual terminology extraction? Second (ii) is it worth it to combine multiple embeddings trained on different data sets? For that purpose, we propose the first systematic evaluation of different word embedding models for bilingual terminology extraction from specialized comparable corpora. We emphasize how the character-based embedding model outperforms other models on the quality of the extracted bilingual lexicons. Further more, we propose a new efficient way to combine different embedding models learned from specialized and general-domain data sets. Our approach leads to higher performance than the best individual embedding model.},
  url       = {http://www.aclweb.org/anthology/C18-1080}
}

@InProceedings{agrawal-an-papagelis:2018:C18-1,
  author    = {Agrawal, Ameeta  and  An, Aijun  and  Papagelis, Manos},
  title     = {Learning Emotion-enriched Word Representations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {950--961},
  abstract  = {Most word representation learning methods are based on the distributional hypothesis in linguistics, according to which words that are used and occur in the same contexts tend to possess similar meanings. As a consequence, emotionally dissimilar words, such as "happy" and "sad" occurring in similar contexts would purport more similar meaning than emotionally similar words, such as "happy" and "joy". This complication leads to rather undesirable outcome in predictive tasks that relate to affect (emotional state), such as emotion classification and emotion similarity. In order to address this limitation, we propose a novel method of obtaining emotion-enriched word representations, which projects emotionally similar words into neighboring spaces and emotionally dissimilar ones far apart. The proposed approach leverages distant supervision to automatically obtain a large training dataset of text documents and two recurrent neural network architectures for learning the emotion-enriched representations. Through extensive evaluation on two tasks, including emotion classification and emotion similarity, we demonstrate that the proposed representations outperform several competitive general-purpose and affective word representations.},
  url       = {http://www.aclweb.org/anthology/C18-1081}
}

@InProceedings{vanderlee-EtAl:2018:C18-1,
  author    = {van der Lee, Chris  and  Verduijn, Bart  and  Krahmer, Emiel  and  Wubben, Sander},
  title     = {Evaluating the text quality, human likeness and tailoring component of PASS: A Dutch data-to-text system for soccer},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {962--972},
  abstract  = {We present an evaluation of PASS, a data-to-text system that generates Dutch soccer reports from match statistics which are automatically tailored towards fans of one club or the other. The evaluation in this paper consists of two studies. An intrinsic human-based evaluation of the system's output is described in the first study. In this study it was found that compared to human-written texts, computer-generated texts were rated slightly lower on style-related text components (fluency and clarity) and slightly higher in terms of the correctness of given information. Furthermore, results from the first study showed that tailoring was accurately recognized in most cases, and that participants struggled with correctly identifying whether a text was written by a human or computer. The second study investigated if tailoring affects perceived text quality, for which no results were garnered. This lack of results might be due to negative preconceptions about computer-generated texts which were found in the first study.},
  url       = {http://www.aclweb.org/anthology/C18-1082}
}

@InProceedings{nakanishi-kobayashi-hayashi:2018:C18-1,
  author    = {Nakanishi, Mao  and  Kobayashi, Tetsunori  and  Hayashi, Yoshihiko},
  title     = {Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {973--983},
  abstract  = {Machine-reading comprehension (MRC) has recently attracted attention in the fields of natural language processing and machine learning.},
  url       = {http://www.aclweb.org/anthology/C18-1083}
}

@InProceedings{emmery-manjavacasarevalo-chrupaa:2018:C18-1,
  author    = {Emmery, Chris  and  Manjavacas Arevalo, Enrique  and  Chrupała, Grzegorz},
  title     = {Style Obfuscation by Invariance},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {984--996},
  abstract  = {The task of obfuscating writing style using sequence models has previously been investigated under the framework of obfuscation-by-transfer, where the input text is explicitly rewritten in another style. A side effect of this framework are the frequent major alterations to the semantic content of the input. In this work, we propose obfuscation-by-invariance, and investigate to what extent models trained to be explicitly style-invariant preserve semantics. We evaluate our architectures in parallel and non-parallel settings, and compare automatic and human evaluations on the obfuscated sentences. Our experiments show that the performance of a style classifier can be reduced to chance level, while the output is evaluated to be of equal quality to models applying style-transfer. Additionally, human evaluation indicates a trade-off between the level of obfuscation and the observed quality of the output in terms of meaning preservation and grammaticality.},
  url       = {http://www.aclweb.org/anthology/C18-1084}
}

@InProceedings{ye-li-baldwin:2018:C18-1,
  author    = {Ye, Zhe  and  Li, Fang  and  Baldwin, Timothy},
  title     = {Encoding Sentiment Information into Word Vectors for Sentiment Analysis},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {997--1007},
  abstract  = {General-purpose pre-trained word embeddings have become a mainstay of natural language processing, and more recently, methods have been proposed to encode external knowledge into word embeddings to benefit specific downstream tasks. The goal of this paper is to encode sentiment knowledge into pre-trained word vectors to improve the performance of sentiment analysis. Our proposed method is based on a convolutional neural network (CNN) and an external sentiment lexicon. Experiments on four popular sentiment analysis datasets show that this method improves the accuracy of sentiment analysis compared to a number of benchmark methods.},
  url       = {http://www.aclweb.org/anthology/C18-1085}
}

@InProceedings{niu-rao-carpuat:2018:C18-1,
  author    = {Niu, Xing  and  Rao, Sudha  and  Carpuat, Marine},
  title     = {Multi-Task Neural Models for Translating Between Styles Within and Across Languages},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1008--1021},
  abstract  = {Generating natural language requires conveying content in an appropriate style. We explore two related tasks on generating text of varying formality: monolingual formality transfer and formality-sensitive machine translation. We propose to solve these tasks jointly using multi-task learning, and show that our models achieve state-of-the-art performance for formality transfer and are able to perform formality-sensitive translation without being explicitly trained on style-annotated translation examples.},
  url       = {http://www.aclweb.org/anthology/C18-1086}
}

@InProceedings{prolo:2018:C18-1,
  author    = {Prolo, Carlos A.},
  title     = {Towards a Language for Natural Language Treebank Transductions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1022--1032},
  abstract  = {This paper describes a transduction language suitable for natural },
  url       = {http://www.aclweb.org/anthology/C18-1087}
}

@InProceedings{li-ding-liu:2018:C18-1,
  author    = {Li, Zhongyang  and  Ding, Xiao  and  Liu, Ting},
  title     = {Generating Reasonable and Diversified Story Ending Using Sequence to Sequence Model with Adversarial Training},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1033--1043},
  abstract  = {Story generation is a challenging problem in artificial intelligence (AI) and has received a lot of interests in the natural language processing (NLP) community. Most previous work tried to solve this problem using Sequence to Sequence (Seq2Seq) model trained with Maximum Likelihood Estimation (MLE). However, the pure MLE training objective much limits the power of Seq2Seq model in generating high-quality storys. In this paper, we propose using adversarial training augmented Seq2Seq model to generate reasonable and diversified story endings given a story context. Our model includes a generator that defines the policy of generating a story ending, and a discriminator that labels story endings as human-generated or machine-generated. Carefully designed human and automatic evaluation metrics demonstrate that our adversarial training augmented Seq2Seq model can generate more reasonable and diversified story endings compared to purely MLE-trained Seq2Seq model. Moreover, our model achieves better performance on the task of Story Cloze Test with an accuracy of 62.6% compared with state-of-the-art baseline methods.},
  url       = {http://www.aclweb.org/anthology/C18-1088}
}

@InProceedings{li-wan:2018:C18-1,
  author    = {Li, Liunian  and  Wan, Xiaojun},
  title     = {Point Precisely: Towards Ensuring the Precision of Data in Generated Texts Using Delayed Copy Mechanism},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1044--1055},
  abstract  = {The task of data-to-text generation aims to generate descriptive texts conditioned on a number of database records, and recent neural models have shown significant progress on this task. The attention based encoder-decoder models with copy mechanism have achieved state-of-the-art results on a few data-to-text datasets. However, such models still face the problem of putting incorrect data records in the generated texts, especially on some more challenging datasets like RotoWire. In this paper, we propose a two-stage approach with a delayed copy mechanism to improve the precision of data records in the generated texts. Our approach first adopts an encoder-decoder model to generate a template text with data slots to be filled and then leverages a proposed delayed copy mechanism to fill in the slots with proper data records. Our delayed copy mechanism can take into account all the information of the input data records and the full generated template text by using double attention, position-aware attention and a pairwise ranking loss. The two models in the two stages are trained separately. Evaluation results on the RotoWire dataset verify the efficacy of our proposed approach to generate better templates and copy data records more precisely.},
  url       = {http://www.aclweb.org/anthology/C18-1089}
}

@InProceedings{kreutz-daelemans:2018:C18-1,
  author    = {Kreutz, Tim  and  Daelemans, Walter},
  title     = {Enhancing General Sentiment Lexicons for Domain-Specific Use},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1056--1064},
  abstract  = {Lexicon based methods for sentiment analysis rely on high quality polarity lexicons. In recent years, automatic methods for inducing lexicons have increased the viability of lexicon based methods for polarity classification. SentProp is a framework for inducing domain-specific polarities from word embeddings. We elaborate on SentProp by evaluating its use for enhancing DuOMan, a general-purpose lexicon, for use in the political domain. By adding only top sentiment bearing words from the vocabulary and applying small polarity shifts in the general-purpose lexicon, we increase accuracy in an in-domain classification task. The enhanced lexicon performs worse than the original lexicon in an out-domain task, showing that the words we added and the polarity shifts we applied are domain-specific and do not translate well to an out-domain setting.},
  url       = {http://www.aclweb.org/anthology/C18-1090}
}

@InProceedings{yu-EtAl:2018:C18-1,
  author    = {Yu, Naitong  and  Zhang, Jie  and  Huang, Minlie  and  Zhu, Xiaoyan},
  title     = {An Operation Network for Abstractive Sentence Compression},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1065--1076},
  abstract  = {Sentence compression condenses a sentence while preserving its most important contents. Delete-based models have the strong ability to delete undesired words, while generate-based models are able to reorder or rephrase the words, which are more coherent to human sentence compression. In this paper, we propose Operation Network, a neural network approach for abstractive sentence compression, which combines the advantages of both delete-based and generate-based sentence compression models. The central idea of Operation Network is to model the sentence compression process as an editing procedure. First, unnecessary words are deleted from the source sentence, then new words are either generated from a large vocabulary or copied directly from the source sentence. A compressed sentence can be obtained by a series of such edit operations (delete, copy and generate). Experiments show that Operation Network outperforms state-of-the-art baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1091}
}

@InProceedings{zhu-qian:2018:C18-1,
  author    = {Zhu, Peisong  and  Qian, Tieyun},
  title     = {Enhanced Aspect Level Sentiment Classification with Auxiliary Memory},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1077--1087},
  abstract  = {In aspect level sentiment classification, there are two common tasks: to identify the sentiment of an aspect (category) or a term. As specific instances of aspects, terms explicitly occur in sentences. It is beneficial for models to focus on nearby context words. In contrast, as high level semantic concepts of terms, aspects usually have more generalizable representations.},
  url       = {http://www.aclweb.org/anthology/C18-1092}
}

@InProceedings{mishra-EtAl:2018:C18-1,
  author    = {Mishra, Pushkar  and  Del Tredici, Marco  and  Yannakoudakis, Helen  and  Shutova, Ekaterina},
  title     = {Author Profiling for Abuse Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1088--1098},
  abstract  = {The rapid growth of social media in recent years has fed into some highly undesirable phenomena such as proliferation of hateful and offensive language on the Internet. Previous research suggests that such abusive content tends to come from users who share a set of common stereotypes and form communities around them. The current state-of-the-art approaches to abuse detection are oblivious to user and community information and rely entirely on textual (i.e., lexical and semantic) cues. },
  url       = {http://www.aclweb.org/anthology/C18-1093}
}

@InProceedings{madnani-cahill:2018:C18-1,
  author    = {Madnani, Nitin  and  Cahill, Aoife},
  title     = {Automated Scoring: Beyond Natural Language Processing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1099--1109},
  abstract  = {In this position paper, we argue that building operational automated scoring systems is a task that has disciplinary complexity above and beyond standard competitive shared tasks which usually involve applying the latest machine learning techniques to publicly available data in order to obtain the best accuracy. Automated scoring systems warrant significant cross-discipline collaboration of which natural language processing and machine learning are just two of many important components. Such systems have multiple stakeholders with different but valid perspectives that can often times be at odds with each other. Our position is that it is essential for us as NLP researchers to understand and incorporate these perspectives in our research and work towards a mutually satisfactory solution in order to build automated scoring systems that are accurate, fair, unbiased, and useful.},
  url       = {http://www.aclweb.org/anthology/C18-1094}
}

@InProceedings{yang-EtAl:2018:C18-11,
  author    = {Yang, Min  and  Qu, Qiang  and  Shen, Ying  and  Liu, Qiao  and  Zhao, Wei  and  Zhu, Jia},
  title     = {Aspect and Sentiment Aware Abstractive Review Summarization},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1110--1120},
  abstract  = {Review text has been widely studied in traditional tasks such as sentiment analysis and aspect extraction. However, to date, no work is towards the abstractive review summarization that is essential for business organizations and individual consumers to make informed decisions. This work takes the lead to study the aspect/sentiment-aware abstractive review summarization by exploring multi-factor attentions. Specifically, we propose an interactive attention mechanism to interactively learns the representations of context words, sentiment words and aspect words within the reviews, acted as an encoder. The learned sentiment and aspect representations are incorporated into the decoder to generate aspect/sentiment-aware review summaries via an attention fusion network. In addition, the abstractive summarizer is jointly trained with the text categorization task, which helps learn a category-specific text encoder, locating salient aspect information and exploring the variations of style and wording of content with respect to different text categories. The experimental results on a real-life dataset demonstrate that our model achieves impressive results compared to other strong competitors.},
  url       = {http://www.aclweb.org/anthology/C18-1095}
}

@InProceedings{he-EtAl:2018:C18-11,
  author    = {He, Ruidan  and  Lee, Wee Sun  and  Ng, Hwee Tou  and  Dahlmeier, Daniel},
  title     = {Effective Attention Modeling for Aspect-Level Sentiment Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1121--1131},
  abstract  = {Aspect-level sentiment classification aims to determine the sentiment polarity of a review sentence towards an opinion target. A sentence could contain multiple sentiment-target pairs; thus the main challenge of this task is to separate different opinion contexts for different targets. To this end, \emph{attention mechanism} has played an important role in previous state-of-the-art neural models. The mechanism is able to capture the importance of each context word towards a target by modeling their semantic associations. We build upon this line of research and propose two novel approaches for improving the effectiveness of attention. First, we propose a method for target representation that better captures the semantic meaning of the opinion target. Second, we introduce an attention model that incorporates syntactic information into the attention mechanism. We experiment on attention-based LSTM (Long Short-Term Memory) models using the datasets from SemEval 2014, 2015, and 2016. The experimental results show that the conventional attention-based LSTM can be substantially improved by incorporating the two approaches.},
  url       = {http://www.aclweb.org/anthology/C18-1096}
}

@InProceedings{moore-rayson:2018:C18-1,
  author    = {Moore, Andrew  and  Rayson, Paul},
  title     = {Bringing replication and reproduction together with generalisability in NLP: Three reproduction studies for Target Dependent Sentiment Analysis},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1132--1144},
  abstract  = {Lack of repeatability and generalisability are two significant threats to continuing scientific development in Natural Language Processing. Language models and learning methods are so complex that scientific conference papers no longer contain enough space for the technical depth required for replication or reproduction. Taking Target Dependent Sentiment Analysis as a case study, we show how recent work in the field has not consistently released code, or described settings for learning methods in enough detail, and lacks comparability and generalisability in train, test or validation data. To investigate generalisability and to enable state of the art comparative evaluations, we carry out the first reproduction studies of three groups of complementary methods and perform the first large-scale mass evaluation on six different English datasets. Reflecting on our experiences, we recommend that future replication or reproduction experiments should always consider a variety of datasets alongside documenting and releasing their methods and published code in order to minimise the barriers to both repeatability and generalisability. We have released our code with a model zoo on GitHub with Jupyter Notebooks to aid understanding and full documentation, and we recommend that others do the same with their papers at submission time through an anonymised GitHub account.},
  url       = {http://www.aclweb.org/anthology/C18-1097}
}

@InProceedings{hsu-chaudhary-samatova:2018:C18-1,
  author    = {Hsu, Shiou Tian  and  Chaudhary, Mandar  and  Samatova, Nagiza},
  title     = {Multilevel Heuristics for Rationale-Based Entity Relation Classification in Sentences},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1145--1155},
  abstract  = {Rationale-based models provide a unique way to provide justifiable results for relation classification models by identifying rationales (key words and phrases that a person can use to justify the relation in the sentence) during the process. However, existing generative networks used to extract rationales come with a trade-off between extracting diversified rationales and achieving good classification results. In this paper, we propose a multilevel heuristic approach to regulate rationale extraction to avoid extracting monotonous rationales without compromising classification performance. In our model, rationale selection is regularized by a semi-supervised process and features from different levels: word, syntax, sentence, and corpus. We evaluate our approach on the SemEval 2010 dataset that includes 19 relation classes and the quality of extracted rationales with our manually-labeled rationales. Experiments show a significant improvement in classification performance and a 20\% gain in rationale interpretability compared to state-of-the-art approaches.},
  url       = {http://www.aclweb.org/anthology/C18-1098}
}

@InProceedings{wang-EtAl:2018:C18-12,
  author    = {Wang, Xiaozhi  and  Han, Xu  and  Lin, Yankai  and  Liu, Zhiyuan  and  Sun, Maosong},
  title     = {Adversarial Multi-lingual Neural Relation Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1156--1166},
  abstract  = {Multi-lingual relation extraction aims to find unknown relational facts from text in various languages. Existing models cannot well capture the consistency and diversity of relation patterns in different languages. To address these issues, we propose an adversarial multi-lingual neural relation extraction (AMNRE) model, which builds both consistent and individual representations for each sentence to consider the consistency and diversity among languages. Further, we adopt an adversarial training strategy to ensure those consistent sentence representations could effectively extract the language-consistent relation patterns. The experimental results on real-world datasets demonstrate that our AMNRE model significantly outperforms the state-of-the-art models. The source code of this paper can be obtained from https://github.com/thunlp/AMNRE.},
  url       = {http://www.aclweb.org/anthology/C18-1099}
}

@InProceedings{ren-EtAl:2018:C18-1,
  author    = {Ren, Feiliang  and  Zhou, Di  and  Liu, Zhihui  and  Li, Yongcheng  and  Zhao, Rongsheng  and  Liu, Yongkang  and  Liang, Xiaobo},
  title     = {Neural Relation Classification with Text Descriptions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1167--1177},
  abstract  = {Relation classification is an important task in natural language processing fields. State-of-the-art methods usually concentrate on building deep neural networks based classification models on the training data in which the relations of the labeled entity pairs are given. However, these methods usually suffer from the data sparsity issue greatly. On the other hand, we notice that it is very easily to obtain some concise text descriptions for almost all of the entities in a relation classification task. The text descriptions can provide helpful supplementary information for relation classification. But they are ignored by most of existing methods. In this paper, we propose DesRC, a new neural relation classification method which integrates entities’ text descriptions into deep neural networks models. We design a two-level attention mechanism to select the most useful information from the “intra-sentence” aspect and the “cross-sentence” aspect. Besides, the adversarial training method is also used to further improve the classification per-formance. Finally, we evaluate the proposed method on the SemEval 2010 dataset. Extensive experiments show that our method achieves much better experimental results than other state-of-the-art relation classification methods.},
  url       = {http://www.aclweb.org/anthology/C18-1100}
}

@InProceedings{liao-lebanoff-liu:2018:C18-1,
  author    = {Liao, Kexin  and  Lebanoff, Logan  and  Liu, Fei},
  title     = {Abstract Meaning Representation for Multi-Document Summarization},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1178--1190},
  abstract  = {Generating an abstract from a collection of documents is a desirable capability for many real-world applications. However, abstractive approaches to multi-document summarization have not been thoroughly investigated. This paper studies the feasibility of using Abstract Meaning Representation (AMR), a semantic representation of natural language grounded in linguistic theory, as a form of content representation. Our approach condenses source documents to a set of summary graphs following the AMR formalism. The summary graphs are then transformed to a set of summary sentences in a surface realization step. The framework is fully data-driven and flexible. Each component can be optimized independently using small-scale, in-domain training data. We perform experiments on benchmark summarization datasets and report promising results. We also describe opportunities and challenges for advancing this line of research.},
  url       = {http://www.aclweb.org/anthology/C18-1101}
}

@InProceedings{nayeem-fuad-chali:2018:C18-1,
  author    = {Nayeem, Mir Tafseer  and  Fuad, Tanvir Ahmed  and  Chali, Yllias},
  title     = {Abstractive Unsupervised Multi-Document Summarization using Paraphrastic Sentence Fusion},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1191--1204},
  abstract  = {In this work, we aim at developing an unsupervised abstractive summarization system in the multi-document setting. We design a paraphrastic sentence fusion model which jointly performs sentence fusion and paraphrasing using skip-gram word embedding model at the sentence level. Our model improves the information coverage and at the same time abstractiveness of the generated sentences. We conduct our experiments on the human-generated multi-sentence compression datasets and evaluate our system on several newly proposed Machine Translation (MT) evaluation metrics. Furthermore, we apply our sentence level model to implement an abstractive multi-document summarization system where documents usually contain a related set of sentences. We also propose an optimal solution for the classical summary length limit problem which was not addressed in the past research. For the document level summary, we conduct experiments on the datasets of two different domains (e.g., news article and user reviews) which are well suited for multi-document abstractive summarization. Our experiments demonstrate that the methods bring significant improvements over the state-of-the-art methods.},
  url       = {http://www.aclweb.org/anthology/C18-1102}
}

@InProceedings{tran-nguyen:2018:C18-1,
  author    = {Tran, Van-Khanh  and  Nguyen, Le-Minh},
  title     = {Adversarial Domain Adaptation for Variational Neural Language Generation in Dialogue Systems},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1205--1217},
  abstract  = {Domain Adaptation arises when we aim at learning from source domain a model that can perform acceptably well on a different target domain. It is especially crucial for Natural Language Generation (NLG) in Spoken Dialogue Systems when there are sufficient annotated data in the source domain, but there is a limited labeled data in the target domain. How to effectively utilize as much of existing abilities from source domains is a crucial issue in domain adaptation. },
  url       = {http://www.aclweb.org/anthology/C18-1103}
}

@InProceedings{shekhar-EtAl:2018:C18-1,
  author    = {Shekhar, Ravi  and  Baumgärtner, Tim  and  Venkatesh, Aashish  and  Bruni, Elia  and  Bernardi, Raffaella  and  Fernández, Raquel},
  title     = {Ask No More: Deciding when to guess in referential visual dialogue},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1218--1233},
  abstract  = {Our goal is to explore how the abilities brought in by a dialogue manager can be included in end-to-end visually grounded conversational agents. We make initial steps towards this general goal by augmenting a task-oriented visual dialogue model with a decision-making component that decides whether to ask a follow-up question to identify a target referent in an image, or to stop the conversation to make a guess. Our analyses show that adding a decision making component produces dialogues that are less repetitive and that include fewer unnecessary questions, thus potentially leading to more efficient and less unnatural interactions.},
  url       = {http://www.aclweb.org/anthology/C18-1104}
}

@InProceedings{hou-EtAl:2018:C18-1,
  author    = {Hou, Yutai  and  Liu, Yijia  and  Che, Wanxiang  and  Liu, Ting},
  title     = {Sequence-to-Sequence Data Augmentation for Dialogue Language Understanding},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1234--1245},
  abstract  = {In this paper, we study the problem of data augmentation for language understanding in task-oriented dialogue system. In contrast to previous work which augments an utterance without considering its relation with other utterances, we propose a sequence-to-sequence generation based data augmentation framework that leverages one utterance's same semantic alternatives in the training data. A novel diversity rank is incorporated into the utterance representation to make the model produce diverse utterances and these diversely augmented utterances help to improve the language understanding module. Experimental results on the Airline Travel Information System dataset and a newly created semantic frame annotation on Stanford Multi-turn, Multi-domain Dialogue Dataset show that our framework achieves significant improvements of 6.38 and 10.04 F-scores respectively when only a training set of hundreds utterances is represented. Case studies also confirm that our method generates diverse utterances.},
  url       = {http://www.aclweb.org/anthology/C18-1105}
}

@InProceedings{kumar-agarwal-joshi:2018:C18-1,
  author    = {Kumar, Harshit  and  Agarwal, Arvind  and  Joshi, Sachindra},
  title     = {Dialogue-act-driven Conversation Model : An Experimental Study},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1246--1256},
  abstract  = {The utility of additional semantic information for the task of next utterance selection in an automated dialogue system is the focus of study in this paper. In particular, we show that additional information available in the form of dialogue acts --when used along with context given in the form of dialogue history-- improves the performance irrespective of the underlying model being generative or discriminative. In order to show the model agnostic behavior of dialogue acts, we experiment with several well-known models such as sequence-to-sequence encoder-decoder model, hierarchical encoder-decoder model, and Siamese-based models with and without hierarchy; and show that in all models, incorporating dialogue acts improves the performance by a significant margin. We, furthermore, propose a novel way of encoding dialogue act information, and use it along with hierarchical encoder to build a model that can use the sequential dialogue act information in a natural way. Our proposed model achieves an MRR of about 84.8% for the task of next utterance selection on a newly introduced Daily Dialogue dataset, and outperform the baseline models. We also provide a detailed analysis of results including key insights that explain the improvement in MRR because of dialog act information.},
  url       = {http://www.aclweb.org/anthology/C18-1106}
}

@InProceedings{chen-EtAl:2018:C18-12,
  author    = {Chen, Lu  and  Tan, Bowen  and  Long, Sishan  and  Yu, Kai},
  title     = {Structured Dialogue Policy with Graph Neural Networks},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1257--1268},
  abstract  = {Recently, deep reinforcement learning (DRL) has been used for dialogue policy optimization. However, many DRL-based policies are not sample-efficient. },
  url       = {http://www.aclweb.org/anthology/C18-1107}
}

@InProceedings{liang-EtAl:2018:C18-1,
  author    = {Liang, Hongru  and  Wang, Haozheng  and  Wang, Jun  and  You, Shaodi  and  Sun, Zhe  and  Wei, Jin-Mao  and  Yang, Zhenglu},
  title     = {JTAV: Jointly Learning Social Media Content Representation by Fusing Textual, Acoustic, and Visual Features},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1269--1280},
  abstract  = {Learning social media content is the basis of many real-world applications,},
  url       = {http://www.aclweb.org/anthology/C18-1108}
}

@InProceedings{zou-EtAl:2018:C18-13,
  author    = {Zou, Meng  and  Li, Xihan  and  Liu, Haokun  and  Deng, Zhihong},
  title     = {MEMD: A Diversity-Promoting Learning Framework for Short-Text Conversation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1281--1291},
  abstract  = {Neural encoder-decoder models have been widely applied to conversational response generation, which is a research hot spot in recent years. However, conventional neural encoder-decoder models tend to generate commonplace responses like "I don't know" regardless of what the input is. In this paper, we analyze this problem from a new perspective: latent vectors. Based on it, we propose an easy-to-extend learning framework named MEMD (Multi-Encoder to Multi-Decoder), in which an auxiliary encoder and an auxiliary decoder are introduced to provide necessary training guidance without resorting to extra data or complicating network's inner structure. Experimental results demonstrate that our method effectively improve the quality of generated responses according to automatic metrics and human evaluations, yielding more diverse and smooth replies.},
  url       = {http://www.aclweb.org/anthology/C18-1109}
}

@InProceedings{zhang-EtAl:2018:C18-12,
  author    = {Zhang, Wen  and  Hu, Jiawei  and  Feng, Yang  and  Liu, Qun},
  title     = {Refining Source Representations with Relation Networks for Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1292--1303},
  abstract  = {Although neural machine translation with the encoder-decoder framework has achieved great success recently, it still suffers drawbacks of forgetting distant information, which is an inherent disadvantage of recurrent neural network structure, and disregarding relationship between source words during encoding step. Whereas in practice, the former information and relationship are often useful in current step. We target on solving these problems and thus introduce relation networks to learn better representations of the source. The relation networks are able to facilitate memorization capability of recurrent neural network via associating source words with each other, this would also help retain their relationships. Then the source representations and all the relations are fed into the attention component together while decoding, with the main encoder-decoder framework unchanged. Experiments on several datasets show that our method can improve the translation performance significantly over the conventional encoder-decoder model and even outperform the approach involving supervised syntactic knowledge.},
  url       = {http://www.aclweb.org/anthology/C18-1110}
}

@InProceedings{chu-wang:2018:C18-1,
  author    = {Chu, Chenhui  and  Wang, Rui},
  title     = {A Survey of Domain Adaptation for Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1304--1319},
  abstract  = {Neural machine translation (NMT) is a deep learning based approach for machine translation, which yields the state-of-the-art translation performance in scenarios where large-scale parallel corpora are available. Although the high-quality and domain-specific translation is crucial in the real world, domain-specific corpora are usually scarce or nonexistent, and thus vanilla NMT performs poorly in such scenarios. Domain adaptation that leverages both out-of-domain parallel corpora as well as monolingual corpora for in-domain translation, is very important for domain-specific translation. In this paper, we give a comprehensive survey of the state-of-the-art domain adaptation techniques for NMT.},
  url       = {http://www.aclweb.org/anthology/C18-1111}
}

@InProceedings{tang-EtAl:2018:C18-1,
  author    = {Tang, Gongbo  and  Cap, Fabienne  and  Pettersson, Eva  and  Nivre, Joakim},
  title     = {An Evaluation of Neural Machine Translation Models on Historical Spelling Normalization},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1320--1331},
  abstract  = {In this paper, we apply different NMT models to the problem of historical spelling normalization for five languages: English, German, Hungarian, Icelandic, and Swedish. The NMT models are at different levels, have different attention mechanisms, and different neural network architectures. Our results show that NMT models are much better than SMT models in terms of character error rate. The vanilla RNNs are competitive to GRUs/LSTMs in historical spelling normalization. Transformer models perform better only when provided with more training data. We also find that subword-level models with a small subword vocabulary are better than character-level models. In addition, we propose a hybrid method which further improves the performance of historical spelling normalization.},
  url       = {http://www.aclweb.org/anthology/C18-1112}
}

@InProceedings{salameh-bouamor:2018:C18-1,
  author    = {Salameh, Mohammad  and  Bouamor, Houda},
  title     = {Fine-Grained Arabic Dialect Identification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1332--1344},
  abstract  = {Previous work on the problem of Arabic Dialect Identification typically targeted coarse-grained five dialect classes plus Standard Arabic (6-way classification). This paper presents the first results on a fine-grained dialect classification task covering 25 specific cities from across the Arab World, in addition to Standard Arabic -- a very challenging task. We build several classification systems and explore a large space of features. Our results show that we can identify the exact city of a speaker at an accuracy of 67.9% for sentences with an average length of 7 words (a 9% relative error reduction over the state-of-the-art technique for Arabic dialect identification) and reach more than 90% when we consider 16 words. We also report on additional insights from a data analysis of similarity and difference across Arabic dialects.},
  url       = {http://www.aclweb.org/anthology/C18-1113}
}

@InProceedings{kim-klinger:2018:C18-1,
  author    = {Kim, Evgeny  and  Klinger, Roman},
  title     = {Who Feels What and Why? Annotation of a Literature Corpus with Semantic Roles of Emotions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1345--1359},
  abstract  = {Most approaches to emotion analysis in fictional texts focus on detecting the emotion expressed in text. We argue that this is a simplification which leads to an overgeneralized interpretation of the results, as it does not take into account who experiences an emotion and why. Emotions play a crucial role in the interaction between characters and the events they are involved in. Until today, no specific corpora that capture such an interaction were available for literature. We aim at filling this gap and present a publicly available corpus based on Project Gutenberg, REMAN (Relational EMotion ANnotation), manually annotated for spans which correspond to emotion trigger phrases and entities/events in the roles of experiencers, targets, and causes of the emotion. We provide baseline results for the automatic prediction of these relational structures and show that emotion lexicons are not able to encompass the high variability of emotion expressions and demonstrate that statistical models benefit from joint modeling of emotions with its roles in all subtasks. The corpus that we provide enables future research on the recognition of emotions and associated entities in text. It supports qualitative literary studies and digital humanities. The corpus is available at http://www.ims.uni-stuttgart.de/data/reman .},
  url       = {http://www.aclweb.org/anthology/C18-1114}
}

@InProceedings{ribeiro-EtAl:2018:C18-1,
  author    = {Ribeiro, Joana  and  Narayan, Shashi  and  Cohen, Shay B.  and  Carreras, Xavier},
  title     = {Local String Transduction as Sequence Labeling},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1360--1371},
  abstract  = {We show that the general problem of string transduction can be reduced to the problem of sequence labeling. While character deletion and insertions are allowed in string transduction, they do not exist in sequence labeling. We show how to overcome this difference. Our approach can be used with any sequence labeling algorithm and it works best for problems in which string transduction imposes a strong notion of locality (no long range dependencies). We experiment with spelling correction for social media, OCR correction, and morphological inflection, and we see that it behaves better than seq2seq models and yields state-of-the-art results in several cases.},
  url       = {http://www.aclweb.org/anthology/C18-1115}
}

@InProceedings{aghaebrahimian:2018:C18-1,
  author    = {Aghaebrahimian, Ahmad},
  title     = {Deep Neural Networks at the Service of Multilingual Parallel Sentence Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1372--1383},
  abstract  = {Wikipedia provides an invaluable source of parallel multilingual data, which are in high demand for various sorts of linguistic inquiry, including both theoretical and practical studies. We introduce a novel end-to-end neural model for large-scale parallel data harvesting from Wikipedia. Our model is language-independent, robust, and highly scalable. We use our system for collecting parallel German-English, French-English and Persian-English sentences. Human evaluations at the end show the strong performance of this model in collecting high-quality parallel data. We also propose a statistical framework which extends the results of our human evaluation to other language pairs. Our model also obtained a state-of-the-art result on the German-English dataset of BUCC 2017 shared task on parallel sentence extraction from comparable corpora.},
  url       = {http://www.aclweb.org/anthology/C18-1116}
}

@InProceedings{kutuzov-EtAl:2018:C18-1,
  author    = {Kutuzov, Andrey  and  Øvrelid, Lilja  and  Szymanski, Terrence  and  Velldal, Erik},
  title     = {Diachronic word embeddings and semantic shifts: a survey},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1384--1397},
  abstract  = {Recent years have witnessed a surge of publications aimed at tracing temporal changes in lexical semantics using distributional methods, particularly prediction-based word embedding models. },
  url       = {http://www.aclweb.org/anthology/C18-1117}
}

@InProceedings{he-EtAl:2018:C18-12,
  author    = {He, Ruifang  and  Zhang, Xuefei  and  Jin, Di  and  Wang, Longbiao  and  Dang, Jianwu  and  Li, Xiangang},
  title     = {Interaction-Aware Topic Model for Microblog Conversations through Network Embedding and User Attention},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1398--1409},
  abstract  = {Traditional topic models are insufficient for topic extraction in social media. The existing methods only consider text information or simultaneously model the posts and the static characteristics of social media. They ignore that one discusses diverse topics when dynamically interacting with different people. Moreover, people who talk about the same topic have different effects on the topic. In this paper, we propose an Interaction-Aware Topic Model (IATM) for microblog conversations by integrating network embedding and user attention. A conversation network linking users based on reposting and replying relationship is constructed to mine the dynamic user behaviours. We model dynamic interactions and user attention so as to learn interaction-aware edge embeddings with social context. Then they are incorporated into neural variational inference for generating the more consistent topics. The experiments on three real-world datasets show that our proposed model is effective.},
  url       = {http://www.aclweb.org/anthology/C18-1118}
}

@InProceedings{wang-EtAl:2018:C18-13,
  author    = {Wang, Jingjing  and  Li, Shoushan  and  Jiang, Mingqi  and  Wu, Hanqian  and  Zhou, Guodong},
  title     = {Cross-media User Profiling with Joint Textual and Social User Embedding},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1410--1420},
  abstract  = {In realistic scenarios, a user profiling model (e.g., gender classification or age regression) learned from one social media might perform rather poorly when tested on another social media due to the different data distributions in the two media. In this paper, we address cross-media user profiling by bridging the knowledge between the source and target media with a uniform user embedding},
  url       = {http://www.aclweb.org/anthology/C18-1119}
}

@InProceedings{zaremoodi-haffari:2018:C18-1,
  author    = {Zaremoodi, Poorya  and  Haffari, Gholamreza},
  title     = {Incorporating Syntactic Uncertainty in Neural Machine Translation with a Forest-to-Sequence Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1421--1429},
  abstract  = {Incorporating syntactic information in Neural Machine Translation (NMT) can lead to better reorderings, particularly useful when the language pairs are syntactically highly divergent or when the training bitext is not large. Previous work on using syntactic information, provided by top-1 parse trees generated by (inevitably error-prone) parsers, has been promising. In this paper, we propose a forest-to-sequence NMT model to make use of exponentially many parse trees of the source sentence to compensate for the parser errors. Our method represents the collection of parse trees as a packed forest, and learns a neural transducer to translate from the input forest to the target sentence. Experiments on English to German, Chinese and Farsi translation tasks show the superiority of our approach over the sequence-to-sequence and tree-to-sequence neural translation models.},
  url       = {http://www.aclweb.org/anthology/C18-1120}
}

@InProceedings{li-EtAl:2018:C18-11,
  author    = {Li, Haoran  and  Zhu, Junnan  and  Zhang, Jiajun  and  Zong, Chengqing},
  title     = {Ensure the Correctness of the Summary: Incorporate Entailment Knowledge into Abstractive Sentence Summarization},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1430--1441},
  abstract  = {In this paper, we investigate the sentence summarization task that produces a summary from a source sentence. Neural sequence-to-sequence models have gained considerable success for this task, while most existing approaches only focus on improving the informativeness of the summary, which ignore the correctness, i.e., the summary should not contain unrelated information with respect to the source sentence. We argue that correctness is an essential requirement for summarization systems. Considering a correct summary is semantically entailed by the source sentence, we incorporate entailment knowledge into abstractive summarization models. We propose an entailment-aware encoder under multi-task framework (i.e., summarization generation and entailment recognition) and an entailment-aware decoder by entailment Reward Augmented Maximum Likelihood (RAML) training. Experiment results demonstrate that our models significantly outperform baselines from the aspects of informativeness and correctness.},
  url       = {http://www.aclweb.org/anthology/C18-1121}
}

@InProceedings{grgoire-langlais:2018:C18-1,
  author    = {Grégoire, Francis  and  Langlais, Philippe},
  title     = {Extracting Parallel Sentences with Bidirectional Recurrent Neural Networks to Improve Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1442--1453},
  abstract  = {Parallel sentence extraction is a task addressing the data sparsity problem found in multilingual natural language processing applications. We propose a bidirectional recurrent neural network based approach to extract parallel sentences from collections of multilingual texts. Our experiments with noisy parallel corpora show that we can achieve promising results against a competitive baseline by removing the need of specific feature engineering or additional external resources. To justify the utility of our approach, we extract sentence pairs from Wikipedia articles to train machine translation systems and show significant improvements in translation performance.},
  url       = {http://www.aclweb.org/anthology/C18-1122}
}

@InProceedings{zhang-ng-sproat:2018:C18-1,
  author    = {Zhang, Hao  and  Ng, Axel  and  Sproat, Richard},
  title     = {Fast and Accurate Reordering with ITG Transition RNN},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1454--1463},
  abstract  = {Attention-based sequence-to-sequence neural network models learn to jointly align and translate. The quadratic-time attention mechanism is powerful as it is capable of handling arbitrary long-distance reordering, but computationally expensive. In this paper, towards making neural translation both accurate and efficient, we follow the traditional pre-reordering approach to decouple reordering from translation. We add a reordering RNN that shares the input encoder with the decoder. The RNNs are trained jointly with a multi-task loss function and applied sequentially at inference time. The task of the reordering model is to predict the permutation of the input words following the target language word order. After reordering, the attention in the decoder becomes more peaked and monotonic. For reordering, we adopt the Inversion Transduction Grammars (ITG) and propose a transition system to parse input to trees for reordering. We harness the ITG transition system with RNN. With the modeling power of RNN, we achieve superior reordering accuracy without any feature engineering. In experiments, we apply the model to the task of text normalization. Compared to a strong baseline of attention-based RNN, our ITG RNN re-ordering model can reach the same reordering accuracy with only 1/10 of the training data and is 2.5x faster in decoding.},
  url       = {http://www.aclweb.org/anthology/C18-1123}
}

@InProceedings{wang-EtAl:2018:C18-14,
  author    = {Wang, Mingxuan  and  Xie, Jun  and  Tan, Zhixing  and  Su, Jinsong  and  Xiong, Deyi  and  Bian, Chao},
  title     = {Neural Machine Translation with Decoding History Enhanced Attention},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1464--1473},
  abstract  = {Neural machine translation with source-side attention have achieved remarkable performance.},
  url       = {http://www.aclweb.org/anthology/C18-1124}
}

@InProceedings{granet-EtAl:2018:C18-1,
  author    = {Granet, Adeline  and  Morin, Emmanuel  and  Mouchère, Harold  and  Quiniou, Solen  and  Viard-Gaudin, Christian},
  title     = {Transfer Learning for a Letter-Ngrams to Word Decoder in the Context of Historical Handwriting Recognition with Scarce Resources},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1474--1484},
  abstract  = {Lack of data can be an issue when beginning a new study on historical handwritten documents.},
  url       = {http://www.aclweb.org/anthology/C18-1125}
}

@InProceedings{cohan-EtAl:2018:C18-1,
  author    = {Cohan, Arman  and  Desmet, Bart  and  Yates, Andrew  and  Soldaini, Luca  and  MacAvaney, Sean  and  Goharian, Nazli},
  title     = {SMHD: a Large-Scale Resource for Exploring Online Language Usage for Multiple Mental Health Conditions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1485--1497},
  abstract  = {Mental health is a significant and growing public health concern. As language usage can be leveraged to obtain crucial insights into mental health conditions, there is a need for large-scale, labeled, mental health-related datasets of users who have been diagnosed with one or more of such conditions. In this paper, we investigate the creation of high-precision patterns to identify self-reported diagnoses of nine different mental health conditions, and obtain high-quality labeled data without the need for manual labelling. We introduce the SMHD (Self-reported Mental Health Diagnoses) dataset and make it available. SMHD is a novel large dataset of social media posts from users with one or multiple mental health conditions along with matched control users. We examine distinctions in users’ language, as measured by linguistic and psychological variables. We further explore text classification methods to identify individuals with mental conditions through their language.},
  url       = {http://www.aclweb.org/anthology/C18-1126}
}

@InProceedings{potthast-EtAl:2018:C18-1,
  author    = {Potthast, Martin  and  Gollub, Tim  and  Komlossy, Kristof  and  Schuster, Sebastian  and  Wiegmann, Matti  and  Garces Fernandez, Erika Patricia  and  Hagen, Matthias  and  Stein, Benno},
  title     = {Crowdsourcing a Large Corpus of Clickbait on Twitter},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1498--1507},
  abstract  = {Clickbait has become a nuisance on social media. To address the urging task of clickbait detection, we constructed a new corpus of 38,517 annotated Twitter tweets, the Webis Clickbait Corpus 2017. To avoid biases in terms of publisher and topic, tweets were sampled from the top 27 most retweeted news publishers, covering a period of 150 days. Each tweet has been annotated on 4-point scale by five annotators recruited at Amazon's Mechanical Turk. The corpus has been employed to evaluate 12 clickbait detectors submitted to the Clickbait Challenge 2017.},
  url       = {http://www.aclweb.org/anthology/C18-1127}
}

@InProceedings{otani-EtAl:2018:C18-1,
  author    = {Otani, Naoki  and  Kiyomaru, Hirokazu  and  Kawahara, Daisuke  and  Kurohashi, Sadao},
  title     = {Cross-lingual Knowledge Projection Using Machine Translation and Target-side Knowledge Base Completion},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1508--1520},
  abstract  = {Considerable effort has been devoted to building commonsense knowledge bases. However, they are not available in many languages because the construction of KBs is expensive. To bridge the gap between languages, this paper addresses the problem of projecting the knowledge in English, a resource-rich language, into other languages, where the main challenge lies in projection ambiguity. This ambiguity is partially solved by machine translation and target-side knowledge base completion, but neither of them is adequately reliable by itself. We show their combination can project English commonsense knowledge into Japanese and Chinese with high precision. Our method also achieves a top-10 accuracy of 90\% on the crowdsourced English--Japanese benchmark. Furthermore, we use our method to obtain 18,747 facts of accurate Japanese commonsense within a very short period.},
  url       = {http://www.aclweb.org/anthology/C18-1128}
}

@InProceedings{cuong-xu:2018:C18-1,
  author    = {Cuong, Hoang  and  Xu, Jia},
  title     = {Assessing Quality Estimation Models for Sentence-Level Prediction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1521--1533},
  abstract  = {This paper provides an evaluation of a wide range of advanced sentence-level Quality Estimation models, including Support Vector Regression, Ride Regression, Neural Networks, Gaussian Processes, Bayesian Neural Networks, Deep Kernel Learning and Deep Gaussian Processes. Beside the accurateness, our main concerns are also the robustness of Quality Estimation models. Our work raises the difficulty in building strong models. Specifically, we show that Quality Estimation models often behave differently in Quality Estimation feature space, depending on whether the scale of feature space is small, medium or large. We also show that Quality Estimation models often behave differently in evaluation settings, depending on whether test data come from the same domain as the training data or not. Our work suggests several strong candidates to use in different circumstances.},
  url       = {http://www.aclweb.org/anthology/C18-1129}
}

@InProceedings{preoiucpietro-ungar:2018:C18-1,
  author    = {Preoţiuc-Pietro, Daniel  and  Ungar, Lyle},
  title     = {User-Level Race and Ethnicity Predictors from Twitter Text},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1534--1545},
  abstract  = {User demographic inference from social media text has the potential to improve a range of downstream applications, including real-time passive polling or quantifying demographic bias. This study focuses on developing models for user-level race and ethnicity prediction. We introduce a data set of users who self-report their race/ethnicity through a survey, in contrast to previous approaches that use distantly supervised data or perceived labels. We develop predictive models from text which accurately predict the membership of a user to the four largest racial and ethnic groups with up to .884 AUC and make these available to the research community.},
  url       = {http://www.aclweb.org/anthology/C18-1130}
}

@InProceedings{karimi-EtAl:2018:C18-1,
  author    = {Karimi, Hamid  and  Roy, Proteek  and  Saba-Sadiya, Sari  and  Tang, Jiliang},
  title     = {Multi-Source Multi-Class Fake News Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1546--1557},
  abstract  = {Fake news spreading through media outlets poses a real threat to the trustworthiness of information and detecting fake news has attracted increasing attention in recent years. Fake news is typically written intentionally to mislead readers, which determines that fake news detection merely based on news content is tremendously challenging. Meanwhile, fake news could contain true evidence to mock true news and presents different degrees of fakeness, which further exacerbates the detection difficulty. On the other hand, the spread of fake news produces various types of data from different perspectives. These multiple sources provide rich contextual information about fake news and offer unprecedented opportunities for advanced fake news detection. In this paper, we study fake news detection with different degrees of fakeness by integrating multiple sources. In particular, we introduce approaches to combine information from multiple sources and to discriminate between different degrees of fakeness, and propose a Multi-source Multi-class Fake news Detection framework MMFD, which combines automated feature extraction, multi-source fusion and automated degrees of fakeness detection into a coherent and interpretable model. Experimental results on the real-world data demonstrate the effectiveness of the proposed framework and extensive experiments are further conducted to understand the working of the proposed framework.},
  url       = {http://www.aclweb.org/anthology/C18-1131}
}

@InProceedings{dodinh-eger-gurevych:2018:C18-1,
  author    = {Do Dinh, Erik-Lân  and  Eger, Steffen  and  Gurevych, Iryna},
  title     = {Killing Four Birds with Two Stones: Multi-Task Learning for Non-Literal Language Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1558--1569},
  abstract  = {Non-literal language phenomena such as idioms or metaphors are commonly studied in isolation from each other in NLP. However, often similar definitions and features are being used for different phenomena, challenging the distinction. Instead, we propose to view the detection problem as a generalized non-literal language classification problem. In this paper we investigate multi-task learning for related non-literal language phenomena. We show that in contrast to simply joining the data of multiple tasks, multi-task learning consistently improves upon four metaphor and idiom detection tasks in two languages, English and German. Comparing two state-of-the-art multi-task learning architectures, we also investigate when soft parameter sharing and learned information flow can be beneficial for our related tasks. We make our adapted code publicly available.},
  url       = {http://www.aclweb.org/anthology/C18-1132}
}

@InProceedings{choudhary-EtAl:2018:C18-1,
  author    = {Choudhary, Nurendra  and  Singh, Rajat  and  Anvesh Rao, Vijjini  and  Shrivastava, Manish},
  title     = {Twitter corpus of Resource-Scarce Languages for Sentiment Analysis and Multilingual Emoji Prediction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1570--1577},
  abstract  = {In this paper, we leverage social media platforms such as twitter for developing corpus across multiple languages. The corpus creation methodology is applicable for resource-scarce languages provided the speakers of that particular language are active users on social media platforms. We present an approach to extract social media microblogs such as tweets (Twitter). In this paper, we create corpus for multilingual sentiment analysis and emoji prediction in Hindi, Bengali and Telugu. Further, we perform and analyze multiple NLP tasks utilizing the corpus to get interesting observations.},
  url       = {http://www.aclweb.org/anthology/C18-1133}
}

@InProceedings{rama-wichmann:2018:C18-1,
  author    = {Rama, Taraka  and  Wichmann, Søren},
  title     = {Towards identifying the optimal datasize for lexically-based Bayesian inference of linguistic phylogenies},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1578--1590},
  abstract  = {Bayesian linguistic phylogenies are standardly based on cognate matrices for words referring to a fix set of meanings—typically around 100-200. To this day there has not been any empirical investigation into which datasize is optimal. Here we determine, across a set of language families, the optimal number of meanings required for the best performance in Bayesian phylogenetic inference. We rank meanings by stability, infer phylogenetic trees using first the most stable meaning, then the two most stable meanings, and so on, computing the quartet distance of the resulting tree to the tree proposed by language family experts at each step of datasize increase. When a gold standard tree is not available we propose to instead compute the quartet distance between the tree based on the n-most stable meaning and the one based on the n + 1-most stable meanings, increasing n from 1 to N − 1, where N is the total number of meanings. The assumption here is that the value of n for which the quartet distance begins to stabilize is also the value at which the quality of the tree ceases to improve. We show that this assumption is borne out. The results of the two methods vary across families, and the optimal number of meanings appears to correlate with the number of languages under consideration.},
  url       = {http://www.aclweb.org/anthology/C18-1134}
}

@InProceedings{deltredici-fernndez:2018:C18-1,
  author    = {Del Tredici, Marco  and  Fernández, Raquel},
  title     = {The Road to Success: Assessing the Fate of Linguistic Innovations in Online Communities},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1591--1603},
  abstract  = {We investigate the birth and diffusion of lexical innovations in a large dataset of online social communities. We build on sociolinguistic theories and focus on the relation between the spread of a novel term and the social role of the individuals who use it, uncovering characteristics of innovators and adopters. Finally, we perform a prediction task that allows us to anticipate whether an innovation will successfully spread within a community.},
  url       = {http://www.aclweb.org/anthology/C18-1135}
}

@InProceedings{ciobanu-dinu:2018:C18-1,
  author    = {Ciobanu, Alina Maria  and  Dinu, Liviu P.},
  title     = {Ab Initio: Automatic Latin Proto-word Reconstruction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1604--1614},
  abstract  = {Proto-word reconstruction is central to the study of language evolution. It consists of recreating the words in an ancient language from its modern daughter languages. In this paper we investigate automatic word form reconstruction for Latin proto-words. Having modern word forms in multiple Romance languages (French, Italian, Spanish, Portuguese and Romanian), we infer the form of their common Latin ancestors. Our approach relies on the regularities that occurred when the Latin words entered the modern languages. We leverage information from all modern languages, building an ensemble system for proto-word reconstruction. We use conditional random fields for sequence labeling, but we conduct preliminary experiments with recurrent neural networks as well. We apply our method on multiple datasets, showing that our method improves on previous results, having also the advantage of requiring less input data, which is essential in historical linguistics, where resources are generally scarce.},
  url       = {http://www.aclweb.org/anthology/C18-1136}
}

@InProceedings{silfverberg-liu-hulden:2018:C18-1,
  author    = {Silfverberg, Miikka  and  Liu, Ling  and  Hulden, Mans},
  title     = {A Computational Model for the Linguistic Notion of Morphological Paradigm},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1615--1626},
  abstract  = {In supervised learning of morphological patterns, the strategy of generalizing inflectional tables into more abstract paradigms through alignment of the longest common subsequence found in an inflection table has been proposed as an efficient method to deduce the inflectional behavior of unseen word forms. In this paper, we extend this notion of morphological `paradigm' from earlier work and provide a formalization that more accurately matches linguist intuitions about what an inflectional paradigm is. Additionally, we propose and evaluate a mechanism for learning full human-readable paradigm specifications from incomplete data---a scenario when we only have access to a few inflected forms for each lexeme, and want to reconstruct the missing inflections as well as generalize and group the witnessed patterns into a model of more abstract paradigmatic behavior of lexemes.},
  url       = {http://www.aclweb.org/anthology/C18-1137}
}

@InProceedings{bouraoui-jameel-schockaert:2018:C18-1,
  author    = {Bouraoui, Zied  and  Jameel, Shoaib  and  Schockaert, Steven},
  title     = {Relation Induction in Word Embeddings Revisited},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1627--1637},
  abstract  = {Given a set of instances of some relation, the relation induction task is to predict which other word pairs are likely to be related in the same way. While it is natural to use word embeddings for this task, standard approaches based on vector translations turn out to perform poorly. To address this issue, we propose two probabilistic relation induction models. The first model is based on translations, but uses Gaussians to explicitly model the variability of these translations and to encode soft constraints on the source and target words that may be chosen. In the second model, we use Bayesian linear regression to encode the assumption that there is a linear relationship between the vector representations of related words, which is considerably weaker than the assumption underlying translation based models.},
  url       = {http://www.aclweb.org/anthology/C18-1138}
}

@InProceedings{akbik-blythe-vollgraf:2018:C18-1,
  author    = {Akbik, Alan  and  Blythe, Duncan  and  Vollgraf, Roland},
  title     = {Contextual String Embeddings for Sequence Labeling},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1638--1649},
  abstract  = {Recent advances in language modeling using recurrent neural networks have made it viable to model language as distributions over characters. By learning to predict the next character on the basis of previous characters, such models have been shown to automatically internalize linguistic concepts such as words, sentences, subclauses and even sentiment. In this paper, we propose to leverage the internal states of a trained character language model to produce a novel type of word embedding which we refer to as contextual string embeddings. Our proposed embeddings have the distinct properties that they (a) are trained without any explicit notion of words and thus fundamentally model words as sequences of characters, and (b) are contextualized by their surrounding text, meaning that the same word will have different embeddings depending on its contextual use. We conduct a comparative evaluation against previous embeddings and find that our embeddings are highly useful for downstream tasks: across four classic sequence labeling tasks we consistently outperform the previous state-of-the-art. In particular, we significantly outperform previous work on English and German named entity recognition (NER), allowing us to report new state-of-the-art F1-scores on the CoNLL03 shared task.},
  url       = {http://www.aclweb.org/anthology/C18-1139}
}

@InProceedings{bollegala-bao:2018:C18-1,
  author    = {Bollegala, Danushka  and  Bao, Cong},
  title     = {Learning Word Meta-Embeddings by Autoencoding},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1650--1661},
  abstract  = {Distributed word embeddings have shown superior performances in numerous Natural Language Processing (NLP) tasks. However, their performances vary significantly across different tasks, implying that the word embeddings learnt by those methods capture complementary aspects of lexical semantics. Therefore, we believe that it is important to combine the existing word embeddings to produce more accurate and complete \emph{meta-embeddings} of words. We model the meta-embedding learning problem as an autoencoding problem, where we would like to learn a meta-embedding space that can accurately reconstruct \emph{all} source embeddings simultaneously. Thereby, the meta-embedding space is enforced to capture complementary information in different source embeddings via a coherent common embedding space. We propose three flavours of autoencoded meta-embeddings motivated by different requirements that must be satisfied by a meta-embedding. Our experimental results on a series of benchmark evaluations show that the proposed autoencoded meta-embeddings outperform the existing state-of-the-art meta-embeddings in multiple tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1140}
}

@InProceedings{lee-EtAl:2018:C18-11,
  author    = {Lee, Yang-Yin  and  Yen, Ting-Yu  and  Huang, Hen-Hsen  and  Shiue, Yow-Ting  and  Chen, Hsin-Hsi},
  title     = {GenSense: A Generalized Sense Retrofitting Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1662--1671},
  abstract  = {With the aid of recently proposed word embedding algorithms, the study of semantic similarity has progressed and advanced rapidly. However, many natural language processing tasks need sense level representation. To address this issue, some researches propose sense embedding learning algorithms. In this paper, we present a generalized model from existing sense retrofitting model. The generalization takes three major components: semantic relations between the senses, the relation strength and the semantic strength. In the experiment, we show that the generalized model can outperform previous approaches in three types of experiment: semantic relatedness, contextual word similarity and semantic difference.},
  url       = {http://www.aclweb.org/anthology/C18-1141}
}

@InProceedings{bahuleyan-EtAl:2018:C18-1,
  author    = {Bahuleyan, Hareesh  and  Mou, Lili  and  Vechtomova, Olga  and  Poupart, Pascal},
  title     = {Variational Attention for Sequence-to-Sequence Models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1672--1682},
  abstract  = {The variational encoder-decoder (VED) encodes source information as a set of random variables using a neural network, which in turn is decoded into target data using another neural network. In natural language processing, sequence-to-sequence (Seq2Seq) models typically serve as encoder-decoder networks. When combined with a traditional (deterministic) attention mechanism, the variational latent space may be bypassed by the attention model, and thus becomes ineffective. In this paper, we propose a variational attention mechanism for VED, where the attention vector is also modeled as Gaussian distributed random variables. Results on two experiments show that, without loss of quality, our proposed method alleviates the bypassing phenomenon as it increases the diversity of generated sentences.},
  url       = {http://www.aclweb.org/anthology/C18-1142}
}

@InProceedings{wang-patel-jin:2018:C18-1,
  author    = {Wang, Yu  and  Patel, Abhishek  and  Jin, Hongxia},
  title     = {A New Concept of Deep Reinforcement Learning based Augmented General Tagging System},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1683--1693},
  abstract  = {In this paper, a new deep reinforcement learning based augmented general tagging system is proposed. The new system contains two parts: a deep neural network (DNN) based sequence labeling model and a deep reinforcement learning (DRL) based augmented tagger. The augmented tagger helps improve system performance by modeling the data with minority tags. The new system is evaluated on SLU and NLU sequence labeling tasks using ATIS and CoNLL-2003 benchmark datasets, to demonstrate the new system's outstanding performance on general tagging tasks. Evaluated by F1 scores, it shows that the new system outperforms the current state-of-the-art model on ATIS dataset by 1.9% and that on CoNLL-2003 dataset by 1.4%.},
  url       = {http://www.aclweb.org/anthology/C18-1143}
}

@InProceedings{felt-EtAl:2018:C18-1,
  author    = {Felt, Paul  and  Ringger, Eric  and  Boyd-Graber, Jordan  and  Seppi, Kevin},
  title     = {Learning from Measurements in Crowdsourcing Models: Inferring Ground Truth from Diverse Annotation Types},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1694--1704},
  abstract  = {Annotated corpora enable supervised machine learning},
  url       = {http://www.aclweb.org/anthology/C18-1144}
}

@InProceedings{kabdolov-assylbekov-takhanov:2018:C18-1,
  author    = {Kabdolov, Olzhas  and  Assylbekov, Zhenisbek  and  Takhanov, Rustem},
  title     = {Reproducing and Regularizing the SCRN Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1705--1716},
  abstract  = {We reproduce the Structurally Constrained Recurrent Network (SCRN) model, and then regularize it using the existing widespread techniques, such as naive dropout, variational dropout, and weight tying. We show that when regularized and optimized appropriately the SCRN model can achieve performance comparable with the ubiquitous LSTM model in language modeling task on English data, while outperforming it on non-English data.},
  url       = {http://www.aclweb.org/anthology/C18-1145}
}

@InProceedings{song-zhao-liu:2018:C18-1,
  author    = {Song, Kaiqiang  and  Zhao, Lin  and  Liu, Fei},
  title     = {Structure-Infused Copy Mechanisms for Abstractive Summarization},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1717--1729},
  abstract  = {Seq2seq learning has produced promising results on summarization. However, in many cases, system summaries still struggle to keep the meaning of the original intact. They may miss out important words or relations that play critical roles in the syntactic structure of source sentences. In this paper, we present structure-infused copy mechanisms to facilitate copying important words and relations from the source sentence to summary sentence. The approach naturally combines source dependency structure with the copy mechanism of an abstractive sentence summarizer. Experimental results demonstrate the effectiveness of incorporating source-side syntactic information in the system, and our proposed approach compares favorably to state-of-the-art methods.},
  url       = {http://www.aclweb.org/anthology/C18-1146}
}

@InProceedings{vanmiltenburg-elliott-vossen:2018:C18-1,
  author    = {van Miltenburg, Emiel  and  Elliott, Desmond  and  Vossen, Piek},
  title     = {Measuring the Diversity of Automatic Image Descriptions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1730--1741},
  abstract  = {Automatic image description systems typically produce generic sentences},
  url       = {http://www.aclweb.org/anthology/C18-1147}
}

@InProceedings{higurashi-EtAl:2018:C18-1,
  author    = {Higurashi, Tatsuru  and  Kobayashi, Hayato  and  Masuyama, Takeshi  and  Murao, Kazuma},
  title     = {Extractive Headline Generation Based on Learning to Rank for Community Question Answering},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1742--1753},
  abstract  = {User-generated content such as the questions on community question answering (CQA) forums does not always come with appropriate headlines, in contrast to the news articles used in various headline generation tasks. In such cases, we cannot use paired supervised data, e.g., pairs of articles and headlines, to learn a headline generation model. To overcome this problem, we propose an extractive headline generation method based on learning to rank for CQA that extracts the most informative substring from each question as its headline. Experimental results show that our method outperforms several baselines, including a prefix-based method, which is widely used in real services.},
  url       = {http://www.aclweb.org/anthology/C18-1148}
}

@InProceedings{li-EtAl:2018:C18-12,
  author    = {Li, Qian  and  Li, Ziwei  and  Wei, Jin-Mao  and  Gu, Yanhui  and  Jatowt, Adam  and  Yang, Zhenglu},
  title     = {A Multi-Attention based Neural Network with External Knowledge for Story Ending Predicting Task},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1754--1762},
  abstract  = {Enabling a mechanism to understand a temporal story and predict its ending is an interesting issue that has attracted considerable attention, as in case of the ROC Story Cloze Task (SCT). In this paper, we develop a multi-attention-based neural network (MANN) with well-designed optimizations, like Highway Network, and concatenated features with embedding representations into the hierarchical neural network model. Considering the particulars of the specific task, we thoughtfully extend MANN with external knowledge resources, exceeding state-of-the-art results obviously. Furthermore, we develop a thorough understanding of our model through a careful hand analysis on a subset of the stories. We identify what traits of MANN contribute to its outperformance and how external knowledge is obtained in such an ending prediction task.},
  url       = {http://www.aclweb.org/anthology/C18-1149}
}

@InProceedings{fan-EtAl:2018:C18-1,
  author    = {Fan, Zhihao  and  Wei, Zhongyu  and  Wang, Siyuan  and  Liu, Yang  and  Huang, Xuanjing},
  title     = {A Reinforcement Learning Framework for Natural Question Generation using Bi-discriminators},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1763--1774},
  abstract  = {Visual Question Generation (VQG) aims to ask natural questions about an image automatically. Existing research focus on training model to fit the annotated data set that makes it indifferent from other language generation tasks. We argue that natural questions need to have two specific attributes from the perspectives of content and linguistic respectively, namely, natural and human-written. Inspired by the setting of discriminator in adversarial learning, we propose two discriminators, one for each attribute, to enhance the training. We then use the reinforcement learning framework to incorporate scores from the two discriminators as the reward to guide the training of the question generator. Experimental results on a benchmark VQG dataset show the effectiveness and robustness of our model compared to some state-of-the-art models in terms of both automatic and human evaluation metrics.},
  url       = {http://www.aclweb.org/anthology/C18-1150}
}

@InProceedings{brainskas-havrylov-titov:2018:C18-1,
  author    = {Bražinskas, Arthur  and  Havrylov, Serhii  and  Titov, Ivan},
  title     = {Embedding Words as Distributions with a Bayesian Skip-gram Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1775--1789},
  abstract  = {We introduce a method for embedding words as probability densities in a low-dimensional space. },
  url       = {http://www.aclweb.org/anthology/C18-1151}
}

@InProceedings{ettinger-EtAl:2018:C18-1,
  author    = {Ettinger, Allyson  and  Elgohary, Ahmed  and  Phillips, Colin  and  Resnik, Philip},
  title     = {Assessing Composition in Sentence Vector Representations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1790--1801},
  abstract  = {An important component of achieving language understanding is mastering the composition of sentence meaning, but an immediate challenge to solving this problem is the opacity of sentence vector representations produced by current neural sentence composition models. We present a method to address this challenge, developing tasks that directly target compositional meaning information in sentence vector representations with a high degree of precision and control. To enable the creation of these controlled tasks, we introduce a specialized sentence generation system that produces large, annotated sentence sets meeting specified syntactic, semantic and lexical constraints. We describe the details of the method and generation system, and then present results of experiments applying our method to probe for compositional information in embeddings from a number of existing sentence composition models. We find that the method is able to extract useful information about the differing capacities of these models, and we discuss the implications of our results with respect to these systems' capturing of sentence information. We make available for public use the datasets used for these experiments, as well as the generation system.},
  url       = {http://www.aclweb.org/anthology/C18-1152}
}

@InProceedings{zhang-huang-zhao:2018:C18-1,
  author    = {Zhang, Zhuosheng  and  Huang, Yafang  and  Zhao, Hai},
  title     = {Subword-augmented Embedding for Cloze Reading Comprehension},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1802--1814},
  abstract  = {Representation learning is the foundation of machine reading comprehension. In state-of-the-art models, deep learning methods broadly use word and character level representations. However, character is not naturally the minimal linguistic unit. In addition, with a simple concatenation of character and word embedding, previous models actually give suboptimal solution. In this paper, we propose to use subword rather than character for word embedding enhancement. We also empirically explore different augmentation strategies on subword-augmented embedding to enhance the cloze-style reading comprehension model (reader). In detail, we present a reader that uses subword-level representation to augment word embedding with a short list to handle rare words effectively. A thorough examination is conducted to evaluate the comprehensive performance and generalization ability of the proposed reader. Experimental results show that the proposed approach helps the reader significantly outperform the state-of-the-art baselines on various public datasets.},
  url       = {http://www.aclweb.org/anthology/C18-1153}
}

@InProceedings{chen-ling-zhu:2018:C18-1,
  author    = {Chen, Qian  and  Ling, Zhen-Hua  and  Zhu, Xiaodan},
  title     = {Enhancing Sentence Embedding with Generalized Pooling},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1815--1826},
  abstract  = {Pooling is an essential component of a wide variety of sentence representation and embedding models. This paper explores generalized pooling methods to enhance sentence embedding. We propose vector-based multi-head attention that includes the widely used max pooling, mean pooling, and scalar self-attention as special cases. The model benefits from properly designed penalization terms to reduce redundancy in multi-head attention. We evaluate the proposed model on three different tasks: natural language inference (NLI), author profiling, and sentiment classification. The experiments show that the proposed model achieves significant improvement over strong sentence-encoding-based methods, resulting in state-of-the-art performances on four datasets. The proposed approach can be easily implemented for more problems than we discuss in this paper.},
  url       = {http://www.aclweb.org/anthology/C18-1154}
}

@InProceedings{ponkiya-EtAl:2018:C18-1,
  author    = {Ponkiya, Girishkumar  and  Patel, Kevin  and  Bhattacharyya, Pushpak  and  Palshikar, Girish},
  title     = {Treat us like the sequences we are: Prepositional Paraphrasing of Noun Compounds using LSTM},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1827--1836},
  abstract  = {Interpreting noun compounds is a challenging task. It involves uncovering the underlying predicate which is dropped in the formation of the compound. In most cases, this predicate is of the form VERB+PREP. It has been observed that uncovering the preposition is a significant step towards uncovering the predicate.},
  url       = {http://www.aclweb.org/anthology/C18-1155}
}

@InProceedings{hazarika-EtAl:2018:C18-1,
  author    = {Hazarika, Devamanyu  and  Poria, Soujanya  and  Gorantla, Sruthi  and  Cambria, Erik  and  Zimmermann, Roger  and  Mihalcea, Rada},
  title     = {CASCADE: Contextual Sarcasm Detection in Online Discussion Forums},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1837--1848},
  abstract  = {The literature in automated sarcasm detection has mainly focused on lexical-, syntactic- and semantic-level analysis of text. However, a sarcastic sentence can be expressed with contextual presumptions, background and commonsense knowledge. In this paper, we propose a ContextuAl SarCasm DEtector (CASCADE), which adopts a hybrid approach of both content- and context-driven modeling for sarcasm detection in online social media discussions. For the latter, CASCADE aims at extracting contextual information from the discourse of a discussion thread. Also, since the sarcastic nature and form of expression can vary from person to person, CASCADE utilizes user embeddings that encode stylometric and personality features of users. When used along with content-based feature extractors such as convolutional neural networks, we see a significant boost in the classification performance on a large Reddit corpus.},
  url       = {http://www.aclweb.org/anthology/C18-1156}
}

@InProceedings{cattle-ma:2018:C18-1,
  author    = {Cattle, Andrew  and  Ma, Xiaojuan},
  title     = {Recognizing Humour using Word Associations and Humour Anchor Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1849--1858},
  abstract  = {This paper attempts to marry the interpretability of statistical machine learning approaches with the more robust models of joke structure and joke semantics capable of being learned by neural models. Specifically, we explore the use of semantic relatedness features based on word associations, rather than the more common Word2Vec similarity, on a binary humour identification task and identify several factors that make word associations a better fit for humour. We also explore the effects of using joke structure, in the form of humour anchors (Yang et al., 2015), for improving the performance of semantic features and show that, while an intriguing idea, humour anchors contain several pitfalls that can hurt performance.},
  url       = {http://www.aclweb.org/anthology/C18-1157}
}

@InProceedings{hanselowski-EtAl:2018:C18-1,
  author    = {Hanselowski, Andreas  and  PVS, Avinesh  and  Schiller, Benjamin  and  Caspelherr, Felix  and  Chaudhuri, Debanjan  and  Meyer, Christian M.  and  Gurevych, Iryna},
  title     = {A Retrospective Analysis of the Fake News Challenge Stance-Detection Task},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1859--1874},
  abstract  = {The 2017 Fake News Challenge Stage 1 (FNC-1) shared task addressed a stance classification task as a crucial first step towards detecting fake news. To date, there is no in-depth analysis paper to critically discuss FNC-1’s experimental setup, reproduce the results, and draw conclusions for next-generation stance classification methods. In this paper, we provide such an in-depth analysis for the three top-performing systems. We first find that FNC-1’s proposed evaluation metric favors the majority class, which can be easily classified, and thus overestimates the true discriminative power of the methods. Therefore, we propose a new F1-based metric yielding a changed system ranking. Next, we compare the features and architectures used, which leads to a novel feature-rich stacked LSTM model that performs on par with the best systems, but is superior in predicting minority classes. To understand the methods’ ability to generalize, we derive a new dataset and perform both in-domain and cross-domain experiments. Our qualitative and quantitative study helps interpreting the original FNC-1 scores and understand which features help improving performance and why. Our new dataset and all source code used during the reproduction study are publicly available for future research.},
  url       = {http://www.aclweb.org/anthology/C18-1158}
}

@InProceedings{liu-zhang-song:2018:C18-1,
  author    = {Liu, Lizhen  and  Zhang, Donghai  and  Song, Wei},
  title     = {Exploiting Syntactic Structures for Humor Recognition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1875--1883},
  abstract  = {Humor recognition is an interesting and challenging task in natural language processing. This paper proposes to exploit syntactic structure features to enhance humor recognition. Our method achieves significant improvements compared with humor theory driven baselines. We found that some syntactic structure features consistently correlate with humor, which indicate interesting linguistic phenomena. Both the experimental results and the analysis demonstrate that humor},
  url       = {http://www.aclweb.org/anthology/C18-1159}
}

@InProceedings{you-qian-liu:2018:C18-1,
  author    = {You, Zhenni  and  Qian, Tieyun  and  Liu, Bing},
  title     = {An Attribute Enhanced Domain Adaptive Model for Cold-Start Spam Review Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1884--1895},
  abstract  = {Spam detection has long been a research topic in both academic and industry due to its wide applications. Previous studies are mainly focused on extracting linguistic or behavior features to distinguish the spam and legitimate reviews. Such features are either ineffective or take long time to collect and thus are hard to be applied to cold-start spam review detection tasks. Recent advance leveraged the neural network to encode the textual and behavior features for the cold-start problem. However, the abundant attribute information are largely neglected by the existing framework.},
  url       = {http://www.aclweb.org/anthology/C18-1160}
}

@InProceedings{ghaddar-langlais:2018:C18-1,
  author    = {Ghaddar, Abbas  and  Langlais, Phillippe},
  title     = {Robust Lexical Features for Improved Neural Network Named-Entity Recognition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1896--1907},
  abstract  = {Neural network approaches to Named-Entity Recognition reduce the need for carefully hand-crafted features. While some features do remain in state-of-the-art systems, lexical features have been mostly discarded, with the exception of gazetteers. In this work, we show that this is unfair: lexical features are actually quite useful. We propose to embed words and entity types into a low-dimensional vector space we train from annotated data produced by distant supervision thanks to Wikipedia. From this, we compute — offline — a feature vector representing each word. When used with a vanilla recurrent neural network model, this representation yields substantial improvements. We establish a new state-of-the-art F1 score of 87.95 on ONTONOTES 5.0, while matching state-of-the-art performance with a F1 score of 91.73 on the over-studied CONLL-2003 dataset.},
  url       = {http://www.aclweb.org/anthology/C18-1161}
}

@InProceedings{li-yang:2018:C18-1,
  author    = {Li, Ximing  and  Yang, Bo},
  title     = {A Pseudo Label based Dataless Naive Bayes Algorithm for Text Classification with Seed Words},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1908--1917},
  abstract  = {Traditional supervised text classifiers require a large number of manually labeled documents, which are often expensive to obtain. Recently, dataless text classification has attracted more attention, since it only requires very few seed words of categories that are much cheaper. In this paper, we develop a pseudo-label based dataless Naive Bayes (PL-DNB) classifier with seed words. We initialize pseudo-labels for each document using seed word occurrences, and employ the expectation maximization algorithm to train PL-DNB in a semi-supervised manner. The pseudo-labels are iteratively updated using a mixture of seed word occurrences and estimations of label posteriors. To avoid noisy pseudo-labels, we also consider the information of nearest neighboring documents in the pseudo-label update step, i.e., preserving local neighborhood structure of documents. We empirically show that PL-DNB outperforms traditional dataless text classification algorithms with seed words. Especially, PL-DNB performs well on the imbalanced dataset.},
  url       = {http://www.aclweb.org/anthology/C18-1162}
}

@InProceedings{shimizu-rong-miyazaki:2018:C18-1,
  author    = {Shimizu, Nobuyuki  and  Rong, Na  and  Miyazaki, Takashi},
  title     = {Visual Question Answering Dataset for Bilingual Image Understanding: A Study of Cross-Lingual Transfer Using Attention Maps},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1918--1928},
  abstract  = {Visual question answering (VQA) is a challenging task that requires a computer system to understand both a question and an image. While there is much research on VQA in English, there is a lack of datasets for other languages, and English annotation is not directly applicable in those languages. To deal with this, we have created a Japanese VQA dataset by using crowdsourced annotation with images from the Visual Genome dataset. This is the first such dataset in Japanese. As another contribution, we propose a cross-lingual method for making use of English annotation to improve a Japanese VQA system. The proposed method is based on a popular VQA method that uses an attention mechanism. We use attention maps generated from English questions to help improve the Japanese VQA task. The proposed method experimentally performed better than simply using a monolingual corpus, which demonstrates the effectiveness of using attention maps to transfer cross-lingual information.},
  url       = {http://www.aclweb.org/anthology/C18-1163}
}

@InProceedings{baumann-hussein-meyersickendiek:2018:C18-1,
  author    = {Baumann, Timo  and  Hussein, Hussein  and  Meyer-Sickendiek, Burkhard},
  title     = {Style Detection for Free Verse Poetry from Text and Speech},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1929--1940},
  abstract  = {Modern and post-modern free verse poems feature a large and complex variety in their poetic prosodies that falls along a continuum from a more fluent to a more disfluent and choppy style. As the poets of modernism overcame rhyme and meter, they oriented themselves in these two opposing directions, creating a free verse spectrum that calls for new analyses of prosodic forms. We present a method, grounded in philological analysis and current research on cognitive (dis)fluency, for automatically analyzing this spectrum. We define and relate six classes of poetic styles (ranging from parlando to lettristic decomposition) by their gradual differentiation. Based on this discussion, we present a model for automatic prosodic classification of spoken free verse poetry that uses deep hierarchical attention networks to integrate the source text and audio and predict the assigned class. We evaluate our model on a large corpus of German author-read post-modern poetry and find that classes can reliably be differentiated, reaching a weighted f-measure of 0.73, when combining textual and phonetic evidence. In our further analyses, we validate the model’s decision-making process, the philologically hypothesized continuum of fluency and investigate the relative importance of various features.},
  url       = {http://www.aclweb.org/anthology/C18-1164}
}

@InProceedings{wang-EtAl:2018:C18-15,
  author    = {Wang, Hao  and  Zhang, Xiaodong  and  Ma, Shuming  and  Sun, Xu  and  Wang, Houfeng  and  Wang, Mengxiang},
  title     = {A Neural Question Answering Model Based on Semi-Structured Tables},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1941--1951},
  abstract  = {Most question answering (QA) systems are based on raw text and structured knowledge graph. However, raw text corpora are hard for QA system to understand, and structured knowledge graph needs intensive manual work, while it is relatively easy to obtain semi-structured tables from many sources directly, or build them automatically. In this paper, we build an end-to-end system to answer multiple choice questions with semi-structured tables as its knowledge. Our system answers queries by two steps. First, it finds the most similar tables. Then the system measures the relevance between each question and candidate table cells, and choose the most related cell as the source of answer. The system is evaluated with TabMCQ dataset, and gets a huge improvement compared to the state of the art.},
  url       = {http://www.aclweb.org/anthology/C18-1165}
}

@InProceedings{liu-EtAl:2018:C18-11,
  author    = {Liu, Xin  and  Chen, Qingcai  and  Deng, Chong  and  Zeng, Huajun  and  Chen, Jing  and  Li, Dongfang  and  Tang, Buzhou},
  title     = {LCQMC:A Large-scale Chinese Question Matching Corpus},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1952--1962},
  abstract  = {The lack of large-scale question matching corpora greatly limits the development of matching methods in question answering (QA) system, especially for non-English languages. To ameliorate this situation, in this paper, we introduce a large-scale Chinese question matching corpus (named LCQMC), which is released to the public1. LCQMC is more general than paraphrase corpus as it focuses on intent matching rather than paraphrase. How to collect a large number of question pairs in variant linguistic forms, which may present the same intent, is the key point for such corpus construction. In this paper, we first use a search engine to collect large-scale question pairs related to high-frequency words from various domains, then filter irrelevant pairs by the Wasserstein distance, and finally recruit three annotators to manually check the left pairs. After this process, a question matching corpus that contains 260,068 question pairs is constructed. In order to verify the LCQMC corpus, we split it into three parts, i.e., a training set containing 238,766 question pairs, a development set with 8,802 question pairs, and a test set with 12,500 question pairs, and test several well-known sentence matching methods on it. The experimental results not only demonstrate the good quality of LCQMC but also provide solid baseline performance for further researches on this corpus.},
  url       = {http://www.aclweb.org/anthology/C18-1166}
}

@InProceedings{worsham-kalita:2018:C18-1,
  author    = {Worsham, Joseph  and  Kalita, Jugal},
  title     = {Genre Identification and the Compositional Effect of Genre in Literature},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1963--1973},
  abstract  = {Recent advances in Natural Language Processing are finding ways to place an emphasis on the hierarchical nature of text instead of representing language as a flat sequence or unordered collection of words or letters. A human reader must capture multiple levels of abstraction and meaning in order to formulate an understanding of a document. In this paper, we address the problem of developing approaches which are capable of working with extremely large and complex literary documents to perform Genre Identification. The task is to assign the literary classification to a full-length book belonging to a corpus of literature, where the works on average are well over 200,000 words long and genre is an abstract thematic concept. We introduce the Gutenberg Dataset for Genre Identification. Additionally, we present a study on how current deep learning models compare to traditional methods for this task. The results are presented as a baseline along with findings on how using an ensemble of chapters can significantly improve results in deep learning methods. The motivation behind the ensemble of chapters method is discussed as the compositionality of subtexts which make up a larger work and contribute to the overall genre.},
  url       = {http://www.aclweb.org/anthology/C18-1167}
}

@InProceedings{rodriguez-caldwell-liu:2018:C18-1,
  author    = {Rodriguez, Juan Diego  and  Caldwell, Adam  and  Liu, Alexander},
  title     = {Transfer Learning for Entity Recognition of Novel Classes},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1974--1985},
  abstract  = {In this reproduction paper, we replicate and extend several past studies on transfer learning for entity recognition. In particular, we are interested in entity recognition problems where the class labels in the source and target domains are different. Our work is the first direct comparison of these previously published approaches in this problem setting. In addition, we perform experiments on seven new source/target corpus pairs, nearly doubling the total number of corpus pairs that have been studied in all past work combined. Our results empirically demonstrate when each of the published approaches tends to do well. In particular, simpler approaches often work best when there is very little labeled target data, while neural transfer approaches tend to do better},
  url       = {http://www.aclweb.org/anthology/C18-1168}
}

@InProceedings{alolimat-EtAl:2018:C18-12,
  author    = {Al-Olimat, Hussein  and  Thirunarayan, Krishnaprasad  and  Shalin, Valerie  and  Sheth, Amit},
  title     = {Location Name Extraction from Targeted Text Streams using Gazetteer-based Statistical Language Models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1986--1997},
  abstract  = {Extracting location names from informal and unstructured social media data requires the identification of referent boundaries and partitioning compound names. Variability, particularly systematic variability in location names (Carroll, 1983), challenges the identification task. Some of this variability can be anticipated as operations within a statistical language model, in this case drawn from gazetteers such as OpenStreetMap (OSM), Geonames, and DBpedia. This permits evaluation of an observed n-gram in Twitter targeted text as a legitimate location name variant from the same location-context. Using n-gram statistics and location-related dictionaries, our Location Name Extraction tool (LNEx) handles abbreviations and automatically filters and augments the location names in gazetteers (handling name contractions and auxiliary contents) to help detect the boundaries of multi-word location names and thereby delimit them in texts.},
  url       = {http://www.aclweb.org/anthology/C18-1169}
}

@InProceedings{wang-EtAl:2018:C18-16,
  author    = {Wang, Yue  and  Zhang, Richong  and  Xu, Cheng  and  Mao, Yongyi},
  title     = {The APVA-TURBO Approach To Question Answering in Knowledge Base},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1998--2009},
  abstract  = {In this paper, we study the problem of question answering over knowledge base. We identify that the primary bottleneck in this problem is the difficulty in accurately predicting the relations connecting the subject entity to the object entities. We advocate a new model architecture, APVA, which includes a verification mechanism responsible for checking the correctness of predicted relations. The APVA framework naturally supports a well-principled iterative training procedure, which we call turbo training. We demonstrate via experiments that the APVA-TUBRO approach drastically improves the question answering performance.},
  url       = {http://www.aclweb.org/anthology/C18-1170}
}

@InProceedings{zhou-huang-zhu:2018:C18-1,
  author    = {Zhou, Mantong  and  Huang, Minlie  and  Zhu, Xiaoyan},
  title     = {An Interpretable Reasoning Network for Multi-Relation Question Answering},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2010--2022},
  abstract  = {Multi-relation Question Answering is a challenging task, due to the requirement of elaborated analysis on questions and reasoning over multiple fact triples in knowledge base. In this paper, we present a novel model called Interpretable Reasoning Network that employs an interpretable, hop-by-hop reasoning process for question answering. The model dynamically decides which part of an input question should be analyzed at each hop; predicts a relation that corresponds to the current parsed results; utilizes the predicted relation to update the question representation and the state of the reasoning process; and then drives the next-hop reasoning. Experiments show that our model yields state-of-the-art results on two datasets. More interestingly, the model can offer traceable and observable intermediate predictions for reasoning analysis and failure diagnosis, thereby allowing manual manipulation in predicting the final answer.},
  url       = {http://www.aclweb.org/anthology/C18-1171}
}

@InProceedings{liu-EtAl:2018:C18-12,
  author    = {Liu, Qian  and  Huang, Heyan  and  Gao, Yang  and  Wei, Xiaochi  and  Tian, Yuxin  and  Liu, Luyang},
  title     = {Task-oriented Word Embedding for Text Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2023--2032},
  abstract  = {Distributed word representation plays a pivotal role in various natural language processing tasks. In spite of its success, most existing methods only consider contextual information, which is suboptimal when used in various tasks due to a lack of task-specific features. The rational word embeddings should have the ability to capture both the semantic features and task-specific features of words. In this paper, we propose a task-oriented word embedding method and apply it to the text classification task. With the function-aware component, our method regularizes the distribution of words to enable the embedding space to have a clear classification boundary. We evaluate our method using five text classification datasets. The experiment results show that our method significantly outperforms the state-of-the-art methods.},
  url       = {http://www.aclweb.org/anthology/C18-1172}
}

@InProceedings{zhao-EtAl:2018:C18-1,
  author    = {Zhao, Jianyu  and  Zhan, Zhiqiang  and  Yang, Qichuan  and  Zhang, Yang  and  Hu, Changjian  and  Li, Zhensheng  and  Zhang, Liuxin  and  He, Zhiqiang},
  title     = {Adaptive Learning of Local Semantic and Global Structure Representations for Text Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2033--2043},
  abstract  = {Representation learning is a key issue for most Natural Language Processing (NLP) tasks. Most existing representation models either learn little structure information or just rely on pre-defined structures, leading to degradation of performance and generalization capability.},
  url       = {http://www.aclweb.org/anthology/C18-1173}
}

@InProceedings{fell-EtAl:2018:C18-1,
  author    = {Fell, Michael  and  Nechaev, Yaroslav  and  Cabrio, Elena  and  Gandon, Fabien},
  title     = {Lyrics Segmentation: Textual Macrostructure Detection using Convolutions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2044--2054},
  abstract  = {Lyrics contain repeated patterns that are correlated with the repetitions found in the music they accompany. Repetitions in song texts have been shown to enable lyrics segmentation -- a fundamental prerequisite of automatically detecting the building blocks (e.g. chorus, verse) of a song text. In this article we improve on the state-of-the-art in lyrics segmentation by applying a convolutional neural network to the task, and experiment with novel features as a step towards deeper macrostructure detection of lyrics.},
  url       = {http://www.aclweb.org/anthology/C18-1174}
}

@InProceedings{xiao-EtAl:2018:C18-1,
  author    = {Xiao, Liqiang  and  Zhang, Honglun  and  Chen, Wenqing  and  Wang, Yongkun  and  Jin, Yaohui},
  title     = {Learning What to Share: Leaky Multi-Task Network for Text Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2055--2065},
  abstract  = {Neural network based multi-task learning has achieved great success on many NLP problems, which focuses on sharing knowledge among tasks by linking some layers to enhance the performance. However, most existing approaches suffer from the interference between tasks because they lack of selection mechanism for feature sharing. In this way, the feature spaces of tasks may be easily contaminated by helpless features borrowed from others, which will confuse the models for making correct prediction. In this paper, we propose a multi-task convolutional neural network with the Leaky Unit, which has memory and forgetting mechanism to filter the feature flows between tasks. Experiments on five different datasets for text classification validate the benefits of our approach.},
  url       = {http://www.aclweb.org/anthology/C18-1175}
}

@InProceedings{levy-EtAl:2018:C18-1,
  author    = {Levy, Ran  and  Bogin, Ben  and  Gretz, Shai  and  Aharonov, Ranit  and  Slonim, Noam},
  title     = {Towards an argumentative content search engine using weak supervision},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2066--2081},
  abstract  = {Searching for sentences containing claims in a large text corpus is a key component in developing an argumentative content search engine. Previous works focused on detecting claims in a small set of documents or within documents enriched with argumentative content. However, pinpointing relevant claims in massive unstructured corpora, received little attention. A step in this direction was taken in (Levy et al. 2017), where the authors suggested using a weak signal to develop a relatively strict query for claim--sentence detection. Here, we leverage this work to define weak signals for training DNNs to obtain significantly greater performance. This approach allows to relax the query and increase the potential coverage. },
  url       = {http://www.aclweb.org/anthology/C18-1176}
}

@InProceedings{gungor-uskudarli-gungor:2018:C18-1,
  author    = {Gungor, Onur  and  Uskudarli, Suzan  and  Gungor, Tunga},
  title     = {Improving Named Entity Recognition by Jointly Learning to Disambiguate Morphological Tags},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2082--2092},
  abstract  = {Previous studies have shown that linguistic features of a word such as possession, genitive or other grammatical cases can be employed in word representations of a named entity recognition (NER) tagger to improve the performance for morphologically rich languages. However, these taggers require external morphological disambiguation (MD) tools to function which are hard to obtain or non-existent for many languages. In this work, we propose a model which alleviates the need for such disambiguators by jointly learning NER and MD taggers in languages for which one can provide a list of candidate morphological analyses. We show that this can be done independent of the morphological annotation schemes, which differ among languages. Our experiments employing three different model architectures that join these two tasks show that joint learning improves NER performance. Furthermore, the morphological disambiguator's performance is shown to be competitive.},
  url       = {http://www.aclweb.org/anthology/C18-1177}
}

@InProceedings{azmy-EtAl:2018:C18-1,
  author    = {Azmy, Michael  and  Shi, Peng  and  Lin, Jimmy  and  Ilyas, Ihab},
  title     = {Farewell Freebase: Migrating the SimpleQuestions Dataset to DBpedia},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2093--2103},
  abstract  = {Question answering over knowledge graphs is an important problem of interest both commercially and academically. There is substantial interest in the class of natural language questions that can be answered via the lookup of a single fact, driven by the availability of the popular SimpleQuestions dataset. The problem with this dataset, however, is that answer triples are provided from Freebase, which has been defunct for several years. As a result, it is difficult to build “real-world” question answering systems that are operationally deployable. Furthermore, a defunct knowledge graph means that much of the infrastructure for querying, browsing, and manipulating triples no longer exists. To address this problem, we present SimpleDBpediaQA, a new benchmark dataset for simple question answering over knowledge graphs that was created by mapping SimpleQuestions entities and predicates from Freebase to DBpedia. Although this mapping is conceptually straightforward, there are a number of nuances that make the task non-trivial, owing to the different conceptual organizations of the two knowledge graphs. To lay the foundation for future research using this dataset, we leverage recent work to provide simple yet strong baselines with and without neural networks.},
  url       = {http://www.aclweb.org/anthology/C18-1178}
}

@InProceedings{bostan-klinger:2018:C18-1,
  author    = {Bostan, Laura Ana Maria  and  Klinger, Roman},
  title     = {An Analysis of Annotated Corpora for Emotion Classification in Text},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2104--2119},
  abstract  = {Several datasets have been annotated and published for classification of emotions. They differ in several ways: (1) the use of different annotation schemata (e. g., discrete label sets, including joy, anger, fear, or sadness or continuous values including valence, or arousal), (2) the domain, and, (3) the file formats. This leads to several research gaps: supervised models often only use a limited set of available resources. Additionally, no previous work has compared emotion corpora in a systematic manner. We aim at contributing to this situation with a survey of the datasets, and aggregate them in a common file format with a common annotation schema. Based on this aggregation, we perform the first cross-corpus classification experiments in the spirit of future research enabled by this paper, in order to gain insight and a better understanding of differences of models inferred from the data. This work also simplifies the choice of the most appropriate resources for developing a model for a novel domain. One result from our analysis is that a subset of corpora is better classified with models trained on a different corpus. For none of the corpora, training on all data altogether is better than using a subselection of the resources. Our unified corpus is available at http://www.ims.uni-stuttgart.de/data/unifyemotion.},
  url       = {http://www.aclweb.org/anthology/C18-1179}
}

@InProceedings{sachan-zaheer-salakhutdinov:2018:C18-1,
  author    = {Sachan, Devendra  and  Zaheer, Manzil  and  Salakhutdinov, Ruslan},
  title     = {Investigating the Working of Text Classifiers},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2120--2131},
  abstract  = {Text classification is one of the most widely studied tasks in natural language processing. Motivated by the principle of compositionality, large multilayer neural network models have been employed for this task in an attempt to effectively utilize the constituent expressions. Almost all of the reported work train large networks using discriminative approaches, which come with a caveat of no proper capacity control, as they tend to latch on to any signal that may not generalize. Using various recent state-of-the-art approaches for text classification, we explore whether these models actually learn to compose the meaning of the sentences or still just focus on some keywords or lexicons for classifying the document. To test our hypothesis, we carefully construct datasets where the training and test splits have no direct overlap of such lexicons, but overall language structure would be similar. We study various text classifiers and observe that there is a big performance drop on these datasets. Finally, we show that even simple models with our proposed regularization techniques, which disincentivize focusing on key lexicons, can substantially improve classification accuracy.},
  url       = {http://www.aclweb.org/anthology/C18-1180}
}

@InProceedings{lai-bui-li:2018:C18-1,
  author    = {Lai, Tuan Manh  and  Bui, Trung  and  Li, Sheng},
  title     = {A Review on Deep Learning Techniques Applied to Answer Selection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2132--2144},
  abstract  = {Given a question and a set of candidate answers, answer selection is the task of identifying which of the candidates answers the question correctly. It is an important problem in natural language processing, with applications in many areas. Recently, many deep learning based methods have been proposed for the task. They produce impressive performance without relying on any feature engineering or expensive external resources. In this paper, we aim to provide a comprehensive review on deep learning methods applied to answer selection.},
  url       = {http://www.aclweb.org/anthology/C18-1181}
}

@InProceedings{yadav-bethard:2018:C18-1,
  author    = {Yadav, Vikas  and  Bethard, Steven},
  title     = {A Survey on Recent Advances in Named Entity Recognition from Deep Learning models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2145--2158},
  abstract  = {Named Entity Recognition (NER) is a key component in NLP systems for question answering, information retrieval, relation extraction, etc. NER systems have been studied and developed widely for decades, but accurate systems using deep neural networks (NN) have only been introduced in the last few years. We present a comprehensive survey of deep neural network architectures for NER, and contrast them with previous approaches to NER based on feature engineering and other supervised or semi-supervised learning algorithms. Our results highlight the improvements achieved by neural networks, and show how incorporating some of the lessons learned from past work on feature-based NER systems can yield further improvements.},
  url       = {http://www.aclweb.org/anthology/C18-1182}
}

@InProceedings{yang-EtAl:2018:C18-12,
  author    = {Yang, Yaosheng  and  Chen, Wenliang  and  Li, Zhenghua  and  He, Zhengqiu  and  Zhang, Min},
  title     = {Distantly Supervised NER with Partial Annotation Learning and Reinforcement Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2159--2169},
  abstract  = {A bottleneck problem with Chinese named entity recognition (NER) in new domains is the lack of annotated data. One solution is to utilize the method of distant supervision, which has been widely used in relation extraction, to automatically populate annotated training data without humancost. The distant supervision assumption here is that if a string in text is included in a predefined dictionary of entities, the string might be an entity. However, this kind of auto-generated data suffers from two main problems: incomplete and noisy annotations, which affect the performance of NER models. In this paper, we propose a novel approach which can partially solve the above problems of distant supervision for NER. In our approach, to handle the incomplete problem, we apply partial annotation learning to reduce the effect of unknown labels of characters. As for noisy annotation, we design an instance selector based on reinforcement learning to distinguish positive sentences from auto-generated annotations. In experiments, we create two datasets for Chinese named entity recognition in two domains with the help of distant supervision. The experimental results show that the proposed approach obtains better performance than the comparison systems on both two datasets.},
  url       = {http://www.aclweb.org/anthology/C18-1183}
}

@InProceedings{shahbazi-EtAl:2018:C18-1,
  author    = {Shahbazi, Hamed  and  Fern, Xiaoli  and  Ghaeini, Reza  and  Ma, Chao  and  Obeidat, Rasha Mohammad  and  Tadepalli, Prasad},
  title     = {Joint Neural Entity Disambiguation with Output Space Search},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2170--2180},
  abstract  = {In this paper, we present a novel model for entity disambiguation that combines both local contextual information and global evidences through Limited Discrepancy Search (LDS). Given an input document, we start from a complete solution constructed by a local model and conduct a search in the space of possible corrections to improve the local solution from a global view point. Our search utilizes a heuristic function to focus more on the least confident local decisions and a pruning function to score the global solutions based on their local fitness and the global coherences among the predicted entities. Experimental results on CoNLL 2003 and TAC 2010 benchmarks verify the effectiveness of our model.},
  url       = {http://www.aclweb.org/anthology/C18-1184}
}

@InProceedings{chen-moschitti:2018:C18-1,
  author    = {Chen, Lingzhen  and  Moschitti, Alessandro},
  title     = {Learning to Progressively Recognize New Named Entities with Sequence to Sequence Models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2181--2191},
  abstract  = {In this paper, we propose to use a sequence to sequence model for Named Entity Recognition (NER) and we explore the effectiveness of such model in a progressive NER setting -- a Transfer Learning (TL) setting. We train an initial model on source data and transfer it to a model that can recognize new NE categories in the target data during a subsequent step, when the source data is no longer available. Our solution consists in: (i) to reshape and re-parametrize the output layer of the first learned model to enable the recognition of new NEs; (ii) to leave the rest of the architecture unchanged, such that it is initialized with parameters transferred from the initial model; and (iii) to fine tune the network on the target data. Most importantly, we design a new NER approach based on sequence to sequence (Seq2Seq) models, which can intuitively work better in our progressive setting. We compare our approach with a Bidirectional LSTM, which is a strong neural NER model. Our experiments show that the Seq2Seq model performs very well on the standard NER setting and it is more robust in the progressive setting. Our approach can recognize previously unseen NE categories while preserving the knowledge of the seen data.},
  url       = {http://www.aclweb.org/anthology/C18-1185}
}

@InProceedings{yu-lam-wang:2018:C18-1,
  author    = {Yu, Qian  and  Lam, Wai  and  Wang, Zihao},
  title     = {Responding E-commerce Product Questions via Exploiting QA Collections and Reviews},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2192--2203},
  abstract  = {Providing instant responses for product questions in E-commerce sites can significantly improve satisfaction of potential consumers. We propose a new framework for automatically responding product questions newly posed by users via exploiting existing QA collections and review collections in a coordinated manner. Our framework can return a ranked list of snippets serving as the automated response for a given question, where each snippet can be a sentence from reviews or an existing question-answer pair. One major subtask in our framework is question-based response review ranking. Learning for response review ranking is challenging since there is no labeled response review available. The collection of existing QA pairs are exploited as distant supervision for learning to rank responses. With proposed distant supervision paradigm, the learned response ranking model makes use of the knowledge in the QA pairs and the corresponding retrieved review lists. Extensive experiments on datasets collected from a real-world commercial E-commerce site demonstrate the effectiveness of our proposed framework.},
  url       = {http://www.aclweb.org/anthology/C18-1186}
}

@InProceedings{khosla-chhaya-chawla:2018:C18-1,
  author    = {Khosla, Sopan  and  Chhaya, Niyati  and  Chawla, Kushal},
  title     = {Aff2Vec: Affect--Enriched Distributional Word Representations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2204--2218},
  abstract  = {Human communication includes information, opinions and reactions. Reactions are often captured by the affective-messages in written as well as verbal communications. While there has been work in affect modeling and to some extent affective content generation, the area of affective word distributions is not well studied. Synsets and lexica capture semantic relationships across words. These models, however, lack in encoding affective or emotional word interpretations. Our proposed model, Aff2Vec, provides a method for enriched word embeddings that are representative of affective interpretations of words. Aff2Vec outperforms the state-of-the-art in intrinsic word-similarity tasks. Further, the use of Aff2Vec representations outperforms baseline embeddings in downstream natural language understanding tasks including sentiment analysis, personality detection, and frustration prediction.},
  url       = {http://www.aclweb.org/anthology/C18-1187}
}

@InProceedings{kunneman-EtAl:2018:C18-1,
  author    = {Kunneman, Florian  and  Wubben, Sander  and  van den Bosch, Antal  and  Krahmer, Emiel},
  title     = {Aspect-based summarization of pros and cons in unstructured product reviews},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2219--2229},
  abstract  = {We developed three systems for generating pros and cons summaries of product reviews. Automating this task eases the writing of product reviews, and offers readers quick access to the most important information. We compared SynPat, a system based on syntactic phrases selected on the basis of valence scores, against a neural-network-based system trained to map bag-of-words representations of reviews directly to pros and cons, and the same neural system trained on clusters of word-embedding encodings of similar pros and cons. We evaluated the systems in two ways: first on held-out reviews with gold-standard pros and cons, and second by asking human annotators to rate the systems' output on relevance and completeness. In the second evaluation, the gold-standard pros and cons were assessed along with the system output. We find that the human-generated summaries are not deemed as significantly more relevant or complete than the SynPat systems; the latter are scored higher than the human-generated summaries on a precision metric. The neural approaches yield a lower performance in the human assessment, and are outperformed by the baseline.},
  url       = {http://www.aclweb.org/anthology/C18-1188}
}

@InProceedings{toledoronen-EtAl:2018:C18-1,
  author    = {Toledo-Ronen, Orith  and  Bar-Haim, Roy  and  Halfon, Alon  and  Jochim, Charles  and  Menczel, Amir  and  Aharonov, Ranit  and  Slonim, Noam},
  title     = {Learning Sentiment Composition from Sentiment Lexicons},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2230--2241},
  abstract  = {Sentiment composition is a fundamental sentiment analysis problem. Previous work relied on manual rules and manually-created lexical resources such as negator lists, or learned a composition function from sentiment-annotated phrases or sentences. We propose a new approach for learning sentiment composition from a large, unlabeled corpus, which only requires a word-level sentiment lexicon for supervision. We automatically generate large sentiment lexicons of bigrams and unigrams, from which we induce a set of lexicons for a variety of sentiment composition processes. The effectiveness of our approach is confirmed through manual annotation, as well as sentiment classification experiments with both phrase-level and sentence-level benchmarks.},
  url       = {http://www.aclweb.org/anthology/C18-1189}
}

@InProceedings{amram-bendavid-tsarfaty:2018:C18-1,
  author    = {Amram, Adam  and  Ben-David, Anat  and  Tsarfaty, Reut},
  title     = {Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from Modern Hebrew},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2242--2252},
  abstract  = {This paper empirically studies the effects of representation choices on neural sentiment analysis for Modern Hebrew, a morphologically rich language (MRL) for which no sentiment analyzer currently exists. We study two dimensions of representational choices: (i) the granularity of the input signal (token-based vs. morpheme-based), and (ii) the level of encoding of vocabulary items (string-based vs. character-based). We hypothesise that for MRLs, languages where multiple meaning-bearing elements may be carried by a single space-delimited token, these choices will have measurable effects on task perfromance, and that these effects may vary for different architectural designs --- fully-connected, convolutional or recurrent. Specifically, we hypothesize that morpheme-based representations will have advantages in terms of their generalization capacity and task accuracy, due to their better OOV coverage. To empirically study these effects, we develop a new sentiment analysis benchmark for Hebrew, based on 12K social media comments, and provide two instances of these data: in token-based and morpheme-based settings. Our experiments show that representation choices empirical effects vary with architecture type. While fully-connected and convolutional networks slightly prefer token-based settings, RNNs benefit from a morpheme-based representation, in accord with the hypothesis that explicit morphological information may help generalize. Our endeavour also delivers the first state-of-the-art broad-coverage sentiment analyzer for Hebrew, with over 89% accuracy, alongside an established benchmark to further study the effects of linguistic representation choices on neural networks' task performance.},
  url       = {http://www.aclweb.org/anthology/C18-1190}
}

@InProceedings{vanson-EtAl:2018:C18-1,
  author    = {van Son, Chantal  and  Morante, Roser  and  Aroyo, Lora  and  Vossen, Piek},
  title     = {Scoring and Classifying Implicit Positive Interpretations: A Challenge of Class Imbalance},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2253--2264},
  abstract  = {This paper reports on a reimplementation of a system on detecting implicit positive meaning from negated statements. In the original regression experiment, different positive interpretations per negation are scored according to their likelihood. We convert the scores to classes and report our results on both the regression and classification tasks. We show that a baseline taking the mean score or most frequent class is hard to beat because of class imbalance in the dataset. Our error analysis indicates that an approach that takes the information structure into account (i.e. which information is new or contrastive) may be promising, which requires looking beyond the syntactic and semantic characteristics of negated statements.},
  url       = {http://www.aclweb.org/anthology/C18-1191}
}

@InProceedings{fan-wang-he:2018:C18-1,
  author    = {Fan, Yan  and  Wang, Chengyu  and  He, Xiaofeng},
  title     = {Exploratory Neural Relation Classification for Domain Knowledge Acquisition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2265--2276},
  abstract  = {The state-of-the-art methods for relation classification are primarily based on deep neural net- works. This kind of supervised learning method suffers from not only limited training data, but also the large number of low-frequency relations in specific domains. In this paper, we propose the task of exploratory relation classification for domain knowledge harvesting. The goal is to learn a classifier on pre-defined relations and discover new relations expressed in texts. A dynamically structured neural network is introduced to classify entity pairs to a continuously expanded relation set. We further propose the similarity sensitive Chinese restaurant process to discover new relations. Experiments conducted on a large corpus show the effectiveness of our neural network, while new relations are discovered with high precision and recall.},
  url       = {http://www.aclweb.org/anthology/C18-1192}
}

@InProceedings{nguyen-nguyen:2018:C18-1,
  author    = {Nguyen, Minh  and  Nguyen, Thien},
  title     = {Who is Killed by Police: Introducing Supervised Attention for Hierarchical LSTMs},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2277--2287},
  abstract  = {Finding names of people killed by police has become increasingly important as police shootings get more and more public attention (police killing detection). Unfortunately, there has been not much work in the literature addressing this problem. The early work in this field (Keith etal., 2017) proposed a distant supervision framework based on Expectation Maximization (EM) to deal with the multiple appearances of the names in documents. However, such EM-based framework cannot take full advantages of deep learning models, necessitating the use of handdesigned features to improve the detection performance. In this work, we present a novel deep learning method to solve the problem of police killing recognition. The proposed method relies on hierarchical LSTMs to model the multiple sentences that contain the person names of interests, and introduce supervised attention mechanisms based on semantical word lists and dependency},
  url       = {http://www.aclweb.org/anthology/C18-1193}
}

@InProceedings{saha-:2018:C18-1,
  author    = {Saha, Swarnadeep  and  -, Mausam},
  title     = {Open Information Extraction from Conjunctive Sentences},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2288--2299},
  abstract  = {We develop CALM, a coordination analyzer that improves upon the conjuncts identified from dependency parses. It uses a language model based scoring and several linguistic constraints to search over hierarchical conjunct boundaries (for nested coordination). By splitting a conjunctive sentence around these conjuncts, CALM outputs several simple sentences. We demonstrate the value of our coordination analyzer in the end task of Open Information Extraction (Open IE).},
  url       = {http://www.aclweb.org/anthology/C18-1194}
}

@InProceedings{cetto-EtAl:2018:C18-1,
  author    = {Cetto, Matthias  and  Niklaus, Christina  and  Freitas, André  and  Handschuh, Siegfried},
  title     = {Graphene: Semantically-Linked Propositions in Open Information Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2300--2311},
  abstract  = {We present an Open Information Extraction (IE) approach that uses a two-layered transformation stage consisting of a clausal disembedding layer and a phrasal disembedding layer, together with rhetorical relation identification. In that way, we convert sentences that present a complex linguistic structure into simplified, syntactically sound sentences, from which we can extract propositions that are represented in a two-layered hierarchy in the form of core relational tuples and accompanying contextual information which are semantically linked via rhetorical relations. In a comparative evaluation, we demonstrate that our reference implementation Graphene outperforms state-of-the-art Open IE systems in the construction of correct n-ary predicate-argument structures. Moreover, we show that existing Open IE approaches can benefit from the transformation process of our framework.},
  url       = {http://www.aclweb.org/anthology/C18-1195}
}

@InProceedings{nagesh-surdeanu:2018:C18-1,
  author    = {Nagesh, Ajay  and  Surdeanu, Mihai},
  title     = {An Exploration of Three Lightly-supervised Representation Learning Approaches for Named Entity Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2312--2324},
  abstract  = {Several semi-supervised representation learning methods have been proposed recently that mitigate the drawbacks of traditional bootstrapping: they reduce the amount of semantic drift introduced by iterative approaches through one-shot learning; others address the sparsity of data through the learning of custom, dense representation for the information modeled. In this work, we are the first to adapt three of these methods, most of which have been originally proposed for image processing, to an information extraction task, specifically, named entity classification. Further, we perform a rigorous comparative analysis on two distinct datasets. Our analysis yields several important observations. First, all representation learning methods outperform state-of-the-art semi-supervised methods that do not rely on representation learning. To the best of our knowledge, we report the latest state-of-the-art results on the semi-supervised named entity classification task. Second, one-shot learning methods clearly outperform iterative representation learning approaches. Lastly, one of the best performers relies on the mean teacher framework (Tarvainen and Valpola, 2017), a simple teacher/student approach that is independent of the underlying task-specific model.},
  url       = {http://www.aclweb.org/anthology/C18-1196}
}

@InProceedings{beinborn-botschen-gurevych:2018:C18-1,
  author    = {Beinborn, Lisa  and  Botschen, Teresa  and  Gurevych, Iryna},
  title     = {Multimodal Grounding for Language Processing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2325--2339},
  abstract  = {This survey discusses how recent developments in multimodal processing facilitate conceptual grounding of language. We categorize the information flow in multimodal processing with respect to cognitive models of human information processing and analyze different methods for combining multimodal representations. Based on this methodological inventory, we discuss the benefit of multimodal grounding for a variety of language processing tasks and the challenges that arise. We particularly focus on multimodal grounding of verbs which play a crucial role for the compositional power of language.},
  url       = {http://www.aclweb.org/anthology/C18-1197}
}

@InProceedings{naik-EtAl:2018:C18-1,
  author    = {Naik, Aakanksha  and  Ravichander, Abhilasha  and  Sadeh, Norman  and  Rose, Carolyn  and  Neubig, Graham},
  title     = {Stress Test Evaluation for Natural Language Inference},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2340--2353},
  abstract  = {Natural language inference (NLI) is the task of determining if a natural language hypothesis can be inferred from a given premise in a justifiable manner. NLI was proposed as a benchmark task for natural language understanding. Existing models perform well at standard datasets for NLI, achieving impressive results across different genres of text. However, the extent to which these models understand the semantic content of sentences is unclear. In this work, we propose an evaluation methodology consisting of automatically constructed "stress tests" that allow us to examine whether systems have the ability to make real inferential decisions. Our evaluation of six sentence-encoder models on these stress tests reveals strengths and weaknesses of these models with respect to challenging linguistic phenomena, and suggests important directions for future work in this area.},
  url       = {http://www.aclweb.org/anthology/C18-1198}
}

@InProceedings{vu-EtAl:2018:C18-1,
  author    = {Vu, Hoa  and  Greco, Claudio  and  Erofeeva, Aliia  and  Jafaritazehjan, Somayeh  and  Linders, Guido  and  Tanti, Marc  and  Testoni, Alberto  and  Bernardi, Raffaella  and  Gatt, Albert},
  title     = {Grounded Textual Entailment},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2354--2368},
  abstract  = {Capturing semantic relations between sentences, such as entailment, is a long-standing challenge for computational semantics. Logic-based models analyse entailment in terms of possible worlds (interpretations, or situations) where a premise P entails a hypothesis H iff in all worlds where P is true, H is also true. Statistical models view this relationship probabilistically, addressing it in terms of whether a human would likely infer H from P. In this paper, we wish to bridge these two perspectives, by arguing for a visually-grounded version of the Textual Entailment task. Specifically, we ask whether models can perform better if, in addition to P and H, there is also an image (corresponding to the relevant "world" or "situation"). We use a multimodal version of the SNLI dataset (Bowman et al., 2015) and we compare "blind" and visually-augmented models of textual entailment. We show that visual information is beneficial, but we also conduct an in-depth error analysis that reveals that current multimodal models are not performing "grounding" in an optimal fashion.},
  url       = {http://www.aclweb.org/anthology/C18-1199}
}

@InProceedings{yin-yaghoobzadeh-schtze:2018:C18-1,
  author    = {Yin, Wenpeng  and  Yaghoobzadeh, Yadollah  and  Schütze, Hinrich},
  title     = {Recurrent One-Hop Predictions for Reasoning over Knowledge Graphs},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2369--2378},
  abstract  = {Large scale knowledge graphs (KGs) such as Freebase are generally incomplete. Reasoning over multi-hop (mh) KG paths is thus an important capability that is needed for question answering or other NLP tasks that require knowledge about the world. mh-KG reasoning includes diverse scenarios, e.g., given a head entity and a relation path, predict the tail entity; or given two entities connected by some relation paths, predict the unknown relation between them. We present ROPs, recurrent one-hop predictors, that predict entities at each step of mh-KB paths by using recurrent neural networks and vector representations of entities and relations, with two benefits: (i) modeling mh-paths of arbitrary lengths while updating the entity and relation representations by the training signal at each step; (ii) handling different types of mh-KG reasoning in a unified framework. Our models show state-of-the-art for two important multi-hop KG reasoning tasks: Knowledge Base Completion and Path Query Answering.},
  url       = {http://www.aclweb.org/anthology/C18-1200}
}

@InProceedings{gu-EtAl:2018:C18-12,
  author    = {Gu, Yue  and  Yang, Kangning  and  Fu, Shiyu  and  Chen, Shuhong  and  Li, Xinyu  and  Marsic, Ivan},
  title     = {Hybrid Attention based Multimodal Network for Spoken Language Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2379--2390},
  abstract  = {We examine the utility of linguistic content and vocal characteristics for multimodal deep learning in human spoken language understanding. We present a deep multimodal network with both feature attention and modality attention to classify utterance-level speech data. The proposed hybrid attention architecture helps the system focus on learning informative representations for both modality-specific feature extraction and model fusion. The experimental results show that our system achieves state-of-the-art or competitive results on three published multimodal datasets. We also demonstrated the effectiveness and generalization of our system on a medical speech dataset from an actual trauma scenario. Furthermore, we provided a detailed comparison and analysis of traditional approaches and deep learning methods on both feature extraction and fusion.},
  url       = {http://www.aclweb.org/anthology/C18-1201}
}

@InProceedings{nagata-sato-takamura:2018:C18-1,
  author    = {Nagata, Ryo  and  Sato, Taisei  and  Takamura, Hiroya},
  title     = {Exploring the Influence of Spelling Errors on Lexical Variation Measures},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2391--2398},
  abstract  = {This paper explores the influence of spelling errors on lexical variation},
  url       = {http://www.aclweb.org/anthology/C18-1202}
}

@InProceedings{sun-EtAl:2018:C18-1,
  author    = {Sun, Qingying  and  Wang, Zhongqing  and  Zhu, Qiaoming  and  Zhou, Guodong},
  title     = {Stance Detection with Hierarchical Attention Network},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2399--2409},
  abstract  = {Stance detection aims to assign a stance label (for or against) to a post toward a specific target. Recently, there is a growing interest in using neural models to detect stance of documents. Most of these works model the sequence of words to learn document representation. However, much linguistic information, such as polarity and arguments of the document, is correlated with the stance of the document, and can inspire us to explore the stance. Hence, we present a neural model to fully employ various linguistic information to construct the document representation. In addition, since the influences of different linguistic information are different, we propose a hierarchical attention network to weigh the importance of various linguistic information, and learn the mutual attention between the document and the linguistic information. The experimental results on two datasets demonstrate the effectiveness of the proposed hierarchical attention neural model.},
  url       = {http://www.aclweb.org/anthology/C18-1203}
}

@InProceedings{shiue-huang-chen:2018:C18-1,
  author    = {Shiue, Yow-Ting  and  Huang, Hen-Hsen  and  Chen, Hsin-Hsi},
  title     = {Correcting Chinese Word Usage Errors for Learning Chinese as a Second Language},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2410--2422},
  abstract  = {With more and more people around the world learning Chinese as a second language, the need of Chinese error correction tools is increasing. In the HSK dynamic composition corpus, word usage error (WUE) is the most common error type. In this paper, we build a neural network model that considers both target erroneous token and context to generate a correction vector and compare it against a candidate vocabulary to propose suitable corrections. To deal with potential alternative corrections, the top five proposed candidates are judged by native Chinese speakers. For more than 91% of the cases, our system can propose at least one acceptable correction within a list of five candidates. To the best of our knowledge, this is the first research addressing general-type Chinese WUE correction. Our system can help non-native Chinese learners revise their sentences by themselves.},
  url       = {http://www.aclweb.org/anthology/C18-1204}
}

@InProceedings{lengerich-maas-potts:2018:C18-1,
  author    = {Lengerich, Ben  and  Maas, Andrew  and  Potts, Christopher},
  title     = {Retrofitting Distributional Embeddings to Knowledge Graphs with Functional Relations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2423--2436},
  abstract  = {Knowledge graphs are a versatile framework to encode richly structured data relationships, but it can be challenging to combine these graphs with unstructured data. Methods for retrofitting pre-trained entity representations to the structure of a knowledge graph typically assume that entities are embedded in a connected space and that relations imply similarity. However, useful knowledge graphs often contain diverse entities and relations (with potentially disjoint underlying corpora) which do not accord with these assumptions. To overcome these limitations, we present Functional Retrofitting, a framework that generalizes current retrofitting methods by explicitly modeling pairwise relations. Our framework can directly incorporate a variety of pairwise penalty functions previously developed for knowledge graph completion. Further, it allows users to encode, learn, and extract information about relation semantics. We present both linear and neural instantiations of the framework. Functional Retrofitting significantly outperforms existing retrofitting methods on complex knowledge graphs and loses no accuracy on simpler graphs (in which relations do imply similarity). Finally, we demonstrate the utility of the framework by predicting new drug--disease treatment pairs in a large, complex health knowledge graph.},
  url       = {http://www.aclweb.org/anthology/C18-1205}
}

@InProceedings{zhang-EtAl:2018:C18-13,
  author    = {Zhang, Weinan  and  Cui, Yiming  and  Wang, Yifa  and  Zhu, Qingfu  and  Li, Lingzhi  and  Zhou, Lianqiang  and  Liu, Ting},
  title     = {Context-Sensitive Generation of Open-Domain Conversational Responses},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2437--2447},
  abstract  = {Despite the success of existing works on single-turn conversation generation, taking the coherence in consideration, human conversing is actually a context-sensitive process. Inspired by the existing studies, this paper proposed the static and dynamic attention based approaches for context-sensitive generation of open-domain conversational responses. Experimental results on two public datasets show that the proposed static attention based approach outperforms all the baselines on automatic and human evaluation.},
  url       = {http://www.aclweb.org/anthology/C18-1206}
}

@InProceedings{liu-EtAl:2018:C18-13,
  author    = {Liu, Rui  and  Bao, Feilong  and  Gao, Guanglai  and  Zhang, Hui  and  Wang, Yonghe},
  title     = {A LSTM Approach with Sub-Word Embeddings for Mongolian Phrase Break Prediction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2448--2455},
  abstract  = {In this paper, we first utilize the word embedding that focuses on sub-word units to the Mongolian Phrase Break (PB) prediction task by using Long-Short-Term-Memory (LSTM) model. Mongolian is an agglutinative language. Each root can be followed by several suffixes to form probably millions of words, but the existing Mongolian corpus is not enough to build a robust entire word embedding, thus it suffers a serious data sparse problem and brings a great difficulty for Mongolian PB prediction. To solve this problem, we look at sub-word units in Mongolian word,},
  url       = {http://www.aclweb.org/anthology/C18-1207}
}

@InProceedings{uresova-EtAl:2018:C18-1,
  author    = {Uresova, Zdenka  and  Fucikova, Eva  and  Hajicova, Eva  and  Hajic, Jan},
  title     = {Synonymy in Bilingual Context: The CzEngClass Lexicon},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2456--2469},
  abstract  = {This paper describes CzEngClass, a bilingual lexical resource being built to investigate verbal synonymy in bilingual context and to relate semantic roles common to one synonym class to verb arguments (verb valency). In addition, the resource is linked to existing resources with the same of a similar aim: English and Czech WordNet, FrameNet, PropBank, VerbNet (SemLink), and valency lexicons for Czech and English (PDT-Vallex, Vallex, and EngVallex). There are several goals of this work and resource: (a) to provide gold standard data for automatic experiments in the future (such as automatic discovery of synonym classes, word sense disambiguation, assignment of classes to occurrences of verbs in text, coreferential linking of verb and event arguments in text, etc.), (b) to build a core (bilingual) lexicon linked to existing resources, for comparative studies and possibly for training automatic tools, and (c) to enrich the annotation of a parallel treebank, the Prague Czech English Dependency Treebank, which so far contained valency annotation but has not linked synonymous senses of verbs together. The method used for extracting the synonym classes is a semi-automatic process with a substantial amount of manual work during filtering, role assignment to classes and individual Class members’ arguments, and linking to the external lexical resources. We present the first version with 200 classes (about 1800 verbs) and evaluate interannotator agreement using several metrics.},
  url       = {http://www.aclweb.org/anthology/C18-1208}
}

@InProceedings{jiao-wang-feng:2018:C18-1,
  author    = {Jiao, Xiaoqi  and  Wang, Fang  and  Feng, Dan},
  title     = {Convolutional Neural Network for Universal Sentence Embeddings},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2470--2481},
  abstract  = {This paper proposes a simple CNN model for creating general-purpose sentence embeddings that can transfer easily across domains and can also act as effective initialization for downstream tasks. Recently, averaging the embeddings of words in a sentence has proven to be a surprisingly},
  url       = {http://www.aclweb.org/anthology/C18-1209}
}

@InProceedings{matteson-EtAl:2018:C18-1,
  author    = {Matteson, Andrew  and  Lee, Chanhee  and  Kim, Youngbum  and  Lim, Heuiseok},
  title     = {Rich Character-Level Information for Korean Morphological Analysis and Part-of-Speech Tagging},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2482--2492},
  abstract  = {Due to the fact that Korean is a highly agglutinative, character-rich language, previous work on Korean morphological analysis typically employs the use of sub-character features known as graphemes or otherwise utilizes comprehensive prior linguistic knowledge (i.e., a dictionary of known morphological transformation forms, or actions). These models have been created with the assumption that character-level, dictionary-less morphological analysis was intractable due to the number of actions required. We present, in this study, a multi-stage action-based model that can perform morphological transformation and part-of-speech tagging using arbitrary units of input and apply it to the case of character-level Korean morphological analysis. Among models that do not employ prior linguistic knowledge, we achieve state-of-the-art word and sentence-level tagging accuracy with the Sejong Korean corpus using our proposed data-driven Bi-LSTM model.},
  url       = {http://www.aclweb.org/anthology/C18-1210}
}

@InProceedings{hakami-hayashi-bollegala:2018:C18-1,
  author    = {Hakami, Huda  and  Hayashi, Kohei  and  Bollegala, Danushka},
  title     = {Why does PairDiff work? - A Mathematical Analysis of Bilinear Relational Compositional Operators for Analogy Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2493--2504},
  abstract  = {Representing the semantic relations that exist between two given words (or entities) is an important first step in a wide-range of NLP applications such as analogical reasoning, knowledge base completion and relational information retrieval. A simple, yet surprisingly accurate method for representing a relation between two words is to compute the vector offset (PairDiff) between their corresponding word embeddings. Despite the empirical success, it remains unclear as to whether PairDiff is the best operator for obtaining a relational representation from word embeddings. We conduct a theoretical analysis of generalised bilinear operators that can be used to measure the l2 relational distance between two word-pairs. We show that, if the word embed- dings are standardised and uncorrelated, such an operator will be independent of bilinear terms, and can be simplified to a linear form, where PairDiff is a special case. For numerous word embedding types, we empirically verify the uncorrelation assumption, demonstrating the general applicability of our theoretical result. Moreover, we experimentally discover PairDiff from the bilinear relational compositional operator on several benchmark analogy datasets.},
  url       = {http://www.aclweb.org/anthology/C18-1211}
}

@InProceedings{wang-goutte:2018:C18-1,
  author    = {Wang, Yunli  and  Goutte, Cyril},
  title     = {Real-time Change Point Detection using On-line Topic Models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2505--2515},
  abstract  = {Detecting changes within an unfolding event in real time from news articles or social media enables to react promptly to serious issues in public safety, public health or natural disasters. In this study, we use on-line Latent Dirichlet Allocation (LDA) to model shifts in topics, and apply on-line change point detection (CPD) algorithms to detect when significant changes happen. We describe an on-line Bayesian change point detection algorithm that we use to detect topic changes from on-line LDA output. Extensive experiments on social media data and news articles show the benefits of on-line LDA versus standard LDA, and of on-line change point detection compared to off-line algorithms. This yields F-scores up to 52% on the detection of significant real-life changes from these document streams.},
  url       = {http://www.aclweb.org/anthology/C18-1212}
}

@InProceedings{schulder-wiegand-ruppenhofer:2018:C18-1,
  author    = {Schulder, Marc  and  Wiegand, Michael  and  Ruppenhofer, Josef},
  title     = {Automatically Creating a Lexicon of Verbal Polarity Shifters: Mono- and Cross-lingual Methods for German},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2516--2528},
  abstract  = {In this paper we use methods for creating a large lexicon of verbal polarity shifters and apply them to German. Polarity shifters are content words that can move the polarity of a phrase towards its opposite, such as the verb "abandon" in "abandon all hope". This is similar to how negation words like "not" can influence polarity. Both shifters and negation are required for high precision sentiment analysis. Lists of negation words are available for many languages, but the only language for which a sizable lexicon of verbal polarity shifters exists is English. This lexicon was created by bootstrapping a sample of annotated verbs with a supervised classifier that uses a set of data- and resource-driven features. We reproduce and adapt this approach to create a German lexicon of verbal polarity shifters. Thereby, we confirm that the approach works for multiple languages. We further improve classification by leveraging cross-lingual information from the English shifter lexicon. Using this improved approach, we bootstrap a large number of German verbal polarity shifters, reducing the annotation effort drastically. The resulting German lexicon of verbal polarity shifters is made publicly available.},
  url       = {http://www.aclweb.org/anthology/C18-1213}
}

@InProceedings{anastasopoulos-EtAl:2018:C18-1,
  author    = {Anastasopoulos, Antonios  and  Lekakou, Marika  and  Quer, Josep  and  Zimianiti, Eleni  and  DeBenedetto, Justin  and  Chiang, David},
  title     = {Part-of-Speech Tagging on an Endangered Language: a Parallel Griko-Italian Resource},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2529--2539},
  abstract  = {Most work on part-of-speech (POS) tagging is focused on high resource languages, or examines low-resource and active learning settings through simulated studies. We evaluate POS tagging techniques on an actual endangered language, Griko. },
  url       = {http://www.aclweb.org/anthology/C18-1214}
}

@InProceedings{wang-EtAl:2018:C18-17,
  author    = {Wang, Lu  and  Li, Shoushan  and  Sun, Changlong  and  Si, Luo  and  Liu, Xiaozhong  and  Zhang, Min  and  Zhou, Guodong},
  title     = {One vs. Many QA Matching with both Word-level and Sentence-level Attention Network},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2540--2550},
  abstract  = {Question-Answer (QA) matching is a fundamental task in the Natural Language Processing community. In this paper, we first build a novel QA matching corpus with informal text which is collected from a product reviewing website. Then, we propose a novel QA matching approach, namely One vs. Many Matching, which aims to address the novel scenario where one question sentence often has an answer with multiple sentences. Furthermore, we improve our matching approach by employing both word-level and sentence-level attentions for solving the noisy problem in the informal text. Empirical studies demonstrate the effectiveness of the proposed approach to question-answer matching.},
  url       = {http://www.aclweb.org/anthology/C18-1215}
}

@InProceedings{kim-EtAl:2018:C18-11,
  author    = {Kim, Yeachan  and  Kim, Kang-Min  and  Lee, Ji-Min  and  Lee, SangKeun},
  title     = {Learning to Generate Word Representations using Subword Information},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2551--2561},
  abstract  = {Distributed representations of words play a major role in the field of natural language processing by encoding semantic and syntactic information of words. However, most existing works on learning word representations typically regard words as individual atomic units and thus are blind to subword information in words. This further gives rise to a difficulty in representing out-of-vocabulary (OOV) words. In this paper, we present a character-based word representation approach to deal with this limitation. The proposed model learns to generate word representations from characters. In our model, we employ a convolutional neural network and a highway network over characters to extract salient features effectively. Unlike previous models that learn word representations from a large corpus, we take a set of pre-trained word embeddings and generalize it to word entries, including OOV words. To demonstrate the efficacy of the proposed model, we perform both an intrinsic and an extrinsic task which are word similarity and language modeling, respectively. Experimental results show clearly that the proposed model significantly outperforms strong baseline models that regard words or their subwords as atomic units. For example, we achieve as much as 18.5% improvement on average in perplexity for morphologically rich languages compared to strong baselines in the language modeling task.},
  url       = {http://www.aclweb.org/anthology/C18-1216}
}

@InProceedings{binzia-raza-athar:2018:C18-1,
  author    = {Bin Zia, Haris  and  Raza, Agha Ali  and  Athar, Awais},
  title     = {Urdu Word Segmentation using Conditional Random Fields (CRFs)},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2562--2569},
  abstract  = {State-of-the-art Natural Language Processing algorithms rely heavily on efficient word segmentation. Urdu is amongst languages for which word segmentation is a complex task as it exhibits space omission as well as space insertion issues. This is partly due to the Arabic script which although cursive in nature, consists of characters that have inherent joining and non-joining attributes regardless of word boundary. This paper presents a word segmentation system for Urdu which uses a Conditional Random Field sequence modeler with orthographic, linguistic and morphological features. Our proposed model automatically learns to predict white space as word boundary as well as Zero Width Non-Joiner (ZWNJ) as sub-word boundary. Using a manually annotated corpus, our model achieves F1 score of 0.97 for word boundary identification and 0.85 for sub-word boundary identification tasks. We have made our code and corpus publicly available to make our results reproducible.},
  url       = {http://www.aclweb.org/anthology/C18-1217}
}

@InProceedings{billami-franois-gala:2018:C18-1,
  author    = {BILLAMI, Mokhtar Boumedyen  and  François, Thomas  and  Gala, Nuria},
  title     = {ReSyf: a French lexicon with ranked synonyms},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2570--2581},
  abstract  = {In this article, we present ReSyf, a lexical resource of monolingual synonyms ranked according to their difficulty to be read and understood by native learners of French. The synonyms come from an existing lexical network and they have been semantically disambiguated and refined. A ranking algorithm, based on a wide range of linguistic features and validated through an evaluation campaign with human annotators, automatically sorts the synonyms corresponding to a given word sense by reading difficulty. ReSyf is freely available and will be integrated into a web platform for reading assistance. It can also be applied to perform lexical simplification of French texts.},
  url       = {http://www.aclweb.org/anthology/C18-1218}
}

@InProceedings{pasquer-EtAl:2018:C18-1,
  author    = {Pasquer, Caroline  and  Savary, Agata  and  Ramisch, Carlos  and  Antoine, Jean-Yves},
  title     = {If you've seen some, you've seen them all: Identifying variants of multiword expressions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2582--2594},
  abstract  = {Multiword expressions, especially verbal ones (VMWEs), show idiosyncratic variability, which is challenging for NLP applications, hence the need for VMWE identification. We focus on the task of variant identification, i.e. identifying variants of previously seen VMWEs, whatever their surface form. },
  url       = {http://www.aclweb.org/anthology/C18-1219}
}

@InProceedings{hao-paul:2018:C18-1,
  author    = {Hao, Shudong  and  Paul, Michael J.},
  title     = {Learning Multilingual Topics from Incomparable Corpora},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2595--2609},
  abstract  = {Multilingual topic models enable crosslingual tasks by extracting consistent topics from multilingual corpora. Most models require parallel or comparable training corpora, which limits their ability to generalize. In this paper, we first demystify the knowledge transfer mechanism behind multilingual topic models by defining an alternative but equivalent formulation. Based on this analysis, we then relax the assumption of training data required by most existing models, creating a model that only requires a dictionary for training. Experiments show that our new method effectively learns coherent multilingual topics from partially and fully incomparable corpora with limited amounts of dictionary resources.},
  url       = {http://www.aclweb.org/anthology/C18-1220}
}

@InProceedings{charbonnier-wartena:2018:C18-1,
  author    = {Charbonnier, Jean  and  Wartena, Christian},
  title     = {Using Word Embeddings for Unsupervised Acronym Disambiguation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2610--2619},
  abstract  = {Scientific papers from all disciplines contain many abbreviations and acronyms. In many},
  url       = {http://www.aclweb.org/anthology/C18-1221}
}

@InProceedings{littell-EtAl:2018:C18-1,
  author    = {Littell, Patrick  and  Kazantseva, Anna  and  Kuhn, Roland  and  Pine, Aidan  and  Arppe, Antti  and  Cox, Christopher  and  Junker, Marie-Odile},
  title     = {Indigenous language technologies in Canada: Assessment, challenges, and successes},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2620--2632},
  abstract  = {In this article, we discuss which text, speech, and image technologies have been developed, and would be feasible to develop, for the approximately 60 Indigenous languages spoken in Canada. In particular, we concentrate on technologies that may be feasible to develop for most or all of these languages, not just those that may be feasible for the few most-resourced of these. We assess past achievements and consider future horizons for Indigenous language transliteration, text prediction, spell-checking, approximate search, machine translation, speech recognition, speaker diarization, speech synthesis, optical character recognition, and computer-aided language learning.},
  url       = {http://www.aclweb.org/anthology/C18-1222}
}

@InProceedings{byamugisha-keet-derenzi:2018:C18-1,
  author    = {Byamugisha, Joan  and  Keet, C. Maria  and  DeRenzi, Brian},
  title     = {Pluralizing Nouns across Agglutinating Bantu Languages},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2633--2643},
  abstract  = {Text generation may require the pluralization of nouns, such as in context-sensitive user interfaces and in natural language generation more broadly. While this has been solved for the widely-used languages, this is still a challenge for the languages in the Bantu language family. Pluralization results obtained for isiZulu and Runyankore showed there were similarities in approach, including the need to combine syntax with semantics, despite belonging to different language zones. This suggests that bootstrapping and generalizability might be feasible. We investigated this systematically for seven languages across three different Guthrie language zones. The first outcome is that Meinhof's 1948 specification of the noun classes are indeed inadequate for computational purposes for all examined languages, due to non-determinism in prefixes, and we thus redefined the characteristic noun class tables of 29 noun classes into 53. The second main result is that the generic pluralizer achieved over 93\% accuracy in coverage testing and over 94\% on a random sample. This is comparable to the language-specific isiZulu and Runyankore pluralizers.},
  url       = {http://www.aclweb.org/anthology/C18-1223}
}

@InProceedings{kazeminejad-EtAl:2018:C18-1,
  author    = {Kazeminejad, Ghazaleh  and  Bonial, Claire  and  Brown, Susan Windisch  and  Palmer, Martha},
  title     = {Automatically Extracting Qualia Relations for the Rich Event Ontology},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2644--2652},
  abstract  = {Commonsense, real-world knowledge about the events that entities or “things in the world” are typically involved in, as well as part-whole relationships, is valuable for allowing computational systems to draw everyday inferences about the world. Here, we focus on automatically extracting information about (1) the events that typically bring about certain entities (origins), (2) the events that are the typical functions of entities, and (3) part-whole relationships in entities. These correspond to the agentive, telic and constitutive qualia central to the Generative Lexicon. We describe our motivations and methods for extracting these qualia relations from the Suggested Upper Merged Ontology (SUMO) and show that human annotators overwhelmingly find the information extracted to be reasonable. Because ontologies provide a way of structuring this information and making it accessible to agents and computational systems generally, efforts are underway to incorporate the extracted information to an ontology hub of Natural Language Processing semantic role labeling resources, the Rich Event Ontology.},
  url       = {http://www.aclweb.org/anthology/C18-1224}
}

@InProceedings{espinosaanke-schockaert:2018:C18-1,
  author    = {Espinosa Anke, Luis  and  Schockaert, Steven},
  title     = {SeVeN: Augmenting Word Embeddings with Unsupervised Relation Vectors},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2653--2665},
  abstract  = {We present SeVeN (Semantic Vector Networks), a hybrid resource that encodes relationships between words in the form of a graph. Different from traditional semantic networks, these relations are represented as vectors in a continuous vector space. We propose a simple pipeline for learning such relation vectors, which is based on word vector averaging in combination with an ad hoc autoencoder. We show that by explicitly encoding relational information in a dedicated vector space we can capture aspects of word meaning that are complementary to what is captured by word embeddings. For example, by examining clusters of relation vectors, we observe that relational similarities can be identified at a more abstract level than with traditional word vector differences. Finally, we test the effectiveness of semantic vector networks in two tasks: measuring word similarity and neural text categorization. SeVeN is available at bitbucket.org/luisespinosa/seven.},
  url       = {http://www.aclweb.org/anthology/C18-1225}
}

@InProceedings{aldarmaki-diab:2018:C18-1,
  author    = {Aldarmaki, Hanan  and  Diab, Mona},
  title     = {Evaluation of Unsupervised Compositional Representations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2666--2677},
  abstract  = {We evaluated various compositional models, from bag-of-words representations to compositional RNN-based models, on several extrinsic supervised and unsupervised evaluation benchmarks. Our results confirm that weighted vector averaging can outperform context-sensitive models in most benchmarks, but structural features encoded in RNN models can also be useful in certain classification tasks. We analyzed some of the evaluation datasets to identify the aspects of meaning they measure and the characteristics of the various models that explain their performance variance.},
  url       = {http://www.aclweb.org/anthology/C18-1226}
}

@InProceedings{iwatsuki-aizawa:2018:C18-1,
  author    = {Iwatsuki, Kenichi  and  Aizawa, Akiko},
  title     = {Using Formulaic Expressions in Writing Assistance Systems},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2678--2689},
  abstract  = {Formulaic expressions (FEs) used in scholarly papers, such as ‘there has been little discussion about’, are helpful for non-native English speakers. However, it is time-consuming for users to manually search for an appropriate expression every time they want to consult FE dictionaries. For this reason, we tackle the task of semantic searches of FE dictionaries. At the start of our research, we identified two salient difficulties in this task. First, the paucity of example sentences in existing FE dictionaries results in a shortage of context information, which is necessary for acquiring semantic representation of FEs. Second, while a semantic category label is assigned to each FE in many FE dictionaries, it is difficult to predict the labels from user input, forcing users to manually designate the semantic category when searching. To address these difficulties, we propose a new framework for semantic searches of FEs and propose a new method to leverage both existing dictionaries and domain sentence corpora. Further, we expand an existing FE dictionary to consider building a more comprehensive and domain-specific FE dictionary and to verify the effectiveness of our method.},
  url       = {http://www.aclweb.org/anthology/C18-1227}
}

@InProceedings{rogers-hosurananthakrishna-rumshisky:2018:C18-1,
  author    = {Rogers, Anna  and  Hosur Ananthakrishna, Shashwath  and  Rumshisky, Anna},
  title     = {What's in Your Embedding, And How It Predicts Task Performance},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2690--2703},
  abstract  = {Attempts to find a single technique for general-purpose intrinsic evaluation of word embeddings have so far not been successful. We present a new approach based on scaled-up qualitative analysis of word vector neighborhoods that quantifies interpretable characteristics of a given model (e.g. its preference for synonyms or shared morphological forms as nearest neighbors). We analyze 21 such factors and show how they correlate with performance on 14 extrinsic and intrinsic task datasets (and also explain the lack of correlation between some of them). Our approach enables multi-faceted evaluation, parameter search, and generally -- a more principled, hypothesis-driven approach to development of distributional semantic representations.},
  url       = {http://www.aclweb.org/anthology/C18-1228}
}

@InProceedings{o-EtAl:2018:C18-1,
  author    = {O, Dongsuk  and  Kwon, Sunjae  and  Kim, Kyungsun  and  Ko, Youngjoong},
  title     = {Word Sense Disambiguation Based on Word Similarity Calculation Using Word Vector Representation from a Knowledge-based Graph},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2704--2714},
  abstract  = {Word sense disambiguation (WSD) is the task to determine the word sense according to its context. Many existing WSD studies have been using an external knowledge-based unsupervised approach because it has fewer word set constraints than supervised approaches requiring training data. In this paper, we propose a new WSD method to generate the context of an ambiguous word by using similarities between an ambiguous word and words in the input document. In addition, to leverage our WSD method, we further propose a new word similarity calculation method based on the semantic network structure of BabelNet. We evaluate the proposed methods on the SemEval-13 and SemEval-15 for English WSD dataset. Experimental results demonstrate that the proposed WSD method significantly improves the baseline WSD method. Furthermore, our WSD system outperforms the state-of-the-art WSD systems in the Semeval-13 dataset. Finally, it has higher performance than the state-of-the-art unsupervised knowledge-based WSD system in the average performance of both datasets.},
  url       = {http://www.aclweb.org/anthology/C18-1229}
}

@InProceedings{patro-EtAl:2018:C18-1,
  author    = {Patro, Badri Narayana  and  Kurmi, Vinod Kumar  and  Kumar, Sandeep  and  Namboodiri, Vinay},
  title     = {Learning Semantic Sentence Embeddings using Sequential Pair-wise Discriminator},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2715--2729},
  abstract  = {In this paper, we propose a method for obtaining sentence-level embeddings. While the problem of securing word-level embeddings is very well studied, we propose a novel method for obtaining sentence-level embeddings. This is obtained by a simple method in the context of solving the paraphrase generation task. If we use a sequential encoder-decoder model for generating paraphrase, we would like the generated paraphrase to be semantically close to the original sentence. One way to ensure this is by adding constraints for true paraphrase embeddings to be close and unrelated paraphrase candidate sentence embeddings to be far. This is ensured by using a sequential pair-wise discriminator that shares weights with the encoder that is trained with a suitable loss function. Our loss function penalizes paraphrase sentence embedding distances from being too large. This loss is used in combination with a sequential encoder-decoder network. We also validated our method by evaluating the obtained embeddings for a sentiment analysis task. The proposed method results in semantic embeddings and outperforms the state-of-the-art on the paraphrase generation and sentiment analysis task on standard datasets. These results are also shown to be statistically significant.},
  url       = {http://www.aclweb.org/anthology/C18-1230}
}

@InProceedings{chollampatt-ng:2018:C18-1,
  author    = {Chollampatt, Shamil  and  Ng, Hwee Tou},
  title     = {A Reassessment of Reference-Based Grammatical Error Correction Metrics},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2730--2741},
  abstract  = {Several metrics have been proposed for evaluating grammatical error correction (GEC) systems based on grammaticality, fluency, and adequacy of the output sentences. Previous studies of the correlation of these metrics with human quality judgments were inconclusive, due to the lack of appropriate significance tests, discrepancies in the methods, and choice of datasets used. In this paper, we re-evaluate reference-based GEC metrics by measuring the system-level correlations with humans on a large dataset of human judgments of GEC outputs, and by properly conducting statistical significance tests. Our results show no significant advantage of GLEU over MaxMatch (M2), contradicting previous studies that claim GLEU to be superior. For a finer-grained analysis, we additionally evaluate these metrics for their agreement with human judgments at the sentence level. Our sentence-level analysis indicates that comparing GLEU and M2, one metric may be more useful than the other depending on the scenario. We further qualitatively analyze these metrics and our findings show that apart from being less interpretable and non-deterministic, GLEU also produces counter-intuitive scores in commonly occurring test examples.},
  url       = {http://www.aclweb.org/anthology/C18-1231}
}

@InProceedings{gong-EtAl:2018:C18-1,
  author    = {Gong, Jingjing  and  Qiu, Xipeng  and  Wang, Shaojing  and  Huang, Xuanjing},
  title     = {Information Aggregation via Dynamic Routing for Sequence Encoding},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2742--2752},
  abstract  = {While much progress has been made in how to encode a text sequence into a sequence of vectors, less attention has been paid to how to aggregate these preceding vectors (outputs of RNN/CNN) into fixed-size encoding vector. Usually, a simple max or average pooling is used, which is a bottom-up and passive way of aggregation and lack of guidance by task information.},
  url       = {http://www.aclweb.org/anthology/C18-1232}
}

@InProceedings{cai-EtAl:2018:C18-1,
  author    = {Cai, Jiaxun  and  He, Shexia  and  Li, Zuchao  and  Zhao, Hai},
  title     = {A Full End-to-End Semantic Role Labeler, Syntactic-agnostic Over Syntactic-aware?},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2753--2765},
  abstract  = {Semantic role labeling (SRL) is to recognize the predicate-argument structure of a sentence, including subtasks of predicate disambiguation and argument labeling. Previous studies usually formulate the entire SRL problem into two or more subtasks. For the first time, this paper introduces an end-to-end neural model which unifiedly tackles the predicate disambiguation and the argument labeling in one shot. Using a biaffine scorer, our model directly predicts all semantic role labels for all given word pairs in the sentence without relying on any syntactic parse information. Specifically, we augment the BiLSTM encoder with a non-linear transformation to further distinguish the predicate and the argument in a given sentence, and model the semantic role labeling process as a word pair classification task by employing the biaffine attentional mechanism. Though the proposed model is syntax-agnostic with local decoder, it outperforms the state-of-the-art syntax-aware SRL systems on the CoNLL-2008, 2009 benchmarks for both English and Chinese. To our best knowledge, we report the first syntax-agnostic SRL model that surpasses all known syntax-aware models.},
  url       = {http://www.aclweb.org/anthology/C18-1233}
}

@InProceedings{patchala-bhatnagar:2018:C18-1,
  author    = {Patchala, Jagadeesh  and  Bhatnagar, Raj},
  title     = {Authorship Attribution By Consensus Among Multiple Features},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2766--2777},
  abstract  = {Most existing research on authorship attribution uses various lexical, syntactic and semantic features. In this paper we demonstrate an effective template-based approach for combining various syntactic features of a document for authorship analysis. The parse-tree based features that we propose are independent of the topic of a document and reflect the innate writing styles of authors. We show that the use of templates including sub-trees of parse trees in conjunction with other syntactic features result in improved author attribution rates. Another contribution is the demonstration that Dempster’s rule based combination of evidence from syntactic features performs better than other evidence-combination methods. We also demonstrate that our methodology works well for the case where actual author is not included in the candidate author set.},
  url       = {http://www.aclweb.org/anthology/C18-1234}
}

@InProceedings{kim-EtAl:2018:C18-12,
  author    = {Kim, Jun-Seong  and  Kim, Junghoe  and  Park, SeungUn  and  Lee, Kwangyong  and  Lee, Yoonju},
  title     = {Modeling with Recurrent Neural Networks for Open Vocabulary Slots},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2778--2790},
  abstract  = {Dealing with ‘open-vocabulary’ slots has been among the challenges in the natural language area. While recent studies on attention-based recurrent neural network (RNN) models have performed well in completing several language related tasks such as spoken language understanding and dialogue systems, there has been a lack of attempts to address filling slots that take on values from a virtually unlimited set. In this paper, we propose a new RNN model that can capture the vital concept: Understanding the role of a word may vary according to how long a reader focuses on a particular part of a sentence. The proposed model utilizes a long-term aware attention structure, positional encoding primarily considering the relative distance between words, and multi-task learning of a character-based language model and an intent detection model. We show that the model outperforms the existing RNN models with respect to discovering ‘open-vocabulary’ slots without any external information, such as a named entity database or knowledge base. In particular, we confirm that it performs better with a greater number of slots in a dataset, including unknown words, by evaluating the models on a dataset of several domains. In addition, the proposed model also demonstrates superior performance with regard to intent detection.},
  url       = {http://www.aclweb.org/anthology/C18-1235}
}

@InProceedings{vanderaa-EtAl:2018:C18-1,
  author    = {Van der Aa, Han  and  Carmona, Josep  and  Leopold, Henrik  and  Mendling, Jan  and  Padró, Lluís},
  title     = {Challenges and Opportunities of Applying Natural Language Processing in Business Process Management},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2791--2801},
  abstract  = {The Business Process Management (BPM) field focuses in the coordination of labor so that organizational processes are smoothly executed in a way that products and services are properly delivered. At the same time, NLP has reached a maturity level that enables its widespread application in many contexts, thanks to publicly available frameworks. In this position paper, we show how NLP has potential in raising the benefits of BPM practices at different levels. Instead of being exhaustive, we show selected key challenges were a successful application of NLP techniques would facilitate the automation of particular tasks that nowadays require a significant effort to accomplish. Finally, we report on applications that consider both the process perspective and its enhancement through NLP.},
  url       = {http://www.aclweb.org/anthology/C18-1236}
}

@InProceedings{ghosal-EtAl:2018:C18-1,
  author    = {Ghosal, Tirthankar  and  Edithal, Vignesh  and  Ekbal, Asif  and  Bhattacharyya, Pushpak  and  Tsatsaronis, George  and  Chivukula, Srinivasa Satya Sameer Kumar},
  title     = {Novelty Goes Deep. A Deep Neural Solution To Document Level Novelty Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2802--2813},
  abstract  = {The rapid growth of documents across the web has necessitated finding means of discarding redundant documents and retaining novel ones. Capturing redundancy is challenging as it may involve investigating at a deep semantic level. Techniques for detecting such semantic redundancy at the document level are scarce. In this work we propose a deep Convolutional Neural Networks (CNN) based model to classify a document as novel or redundant with respect to a set of relevant documents already seen by the system. The system is simple and do not require any manual feature engineering. Our novel scheme encodes relevant and relative information from both source and target},
  url       = {http://www.aclweb.org/anthology/C18-1237}
}

@InProceedings{sundararajan-woodard:2018:C18-1,
  author    = {Sundararajan, Kalaivani  and  Woodard, Damon},
  title     = {What represents "style" in authorship attribution?},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2814--2822},
  abstract  = {Authorship attribution typically uses all information representing both content and style whereas attribution based only on stylistic aspects may be robust in cross-domain settings. This paper analyzes different linguistic aspects that may help represent style. Specifically, we study the role of syntax and lexical words (nouns, verbs, adjectives and adverbs) in representing style. We use a purely syntactic language model to study the significance of sentence structures in both single-domain and cross-domain attribution, \textit{i.e.} cross-topic and cross-genre attribution. We show that syntax may be helpful for cross-genre attribution while cross-topic attribution and single-domain may benefit from additional lexical information. Further, pure syntactic models may not be effective by themselves and need to be used in combination with other robust models. To study the role of word choice, we perform attribution by masking all words or specific topic words corresponding to nouns, verbs, adjectives and adverbs. Using a single-domain dataset, IMDB1M reviews, we demonstrate the heavy influence of common nouns and proper nouns in attribution, thereby highlighting topic interference. Using cross-domain Guardian10 dataset, we show that some common nouns, verbs, adjectives and adverbs may help with stylometric attribution as demonstrated by masking topic words corresponding to these parts-of-speech. As expected, it was observed that proper nouns are heavily influenced by content and cross-domain attribution will benefit from completely masking them.},
  url       = {http://www.aclweb.org/anthology/C18-1238}
}

@InProceedings{duan-EtAl:2018:C18-1,
  author    = {Duan, Junwen  and  Zhang, Yue  and  Ding, Xiao  and  Chang, Ching-Yun  and  Liu, Ting},
  title     = {Learning Target-Specific Representations of Financial News Documents For Cumulative Abnormal Return Prediction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2823--2833},
  abstract  = {Texts from the Internet serve as important data sources for financial market modeling. Early statistical approaches rely on manually defined features to capture lexical, sentiment and event information, which suffers from feature sparsity. Recent work has considered learning dense representations for news titles and abstracts. Compared to news titles, full documents can contain more potentially helpful information, but also noise compared to events and sentences, which has been less investigated in previous work. To fill this gap, we propose a novel target-specific abstract-guided news document representation model. The model uses a target-sensitive representation of the news abstract to weigh sentences in the news content, so as to select and combine the most informative sentences for market modeling. Results show that document representations can give better performance for estimating cumulative abnormal returns of companies when compared to titles and abstracts. Our model is especially effective when it used to combine information from multiple document sources compared to the sentence-level baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1239}
}

@InProceedings{an-han-sun:2018:C18-1,
  author    = {An, Bo  and  Han, Xianpei  and  Sun, Le},
  title     = {Model-Free Context-Aware Word Composition},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2834--2845},
  abstract  = {Word composition is a promising technique for representation learning of large linguistic units (e.g., phrases, sentences and documents). However, most of the current composition models do not take the ambiguity of words and the context outside of a linguistic unit into consideration for learning representations, and consequently suffer from the inaccurate representation of semantics.},
  url       = {http://www.aclweb.org/anthology/C18-1240}
}

@InProceedings{li:2018:C18-1,
  author    = {Li, Yanpeng},
  title     = {Learning Features from Co-occurrences: A Theoretical Analysis},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2846--2854},
  abstract  = {Representing a word by its co-occurrences with other words in context is an effective way to capture the meaning of the word. However, the theory behind remains a challenge. In this work, taking the example of a word classification task, we give a theoretical analysis of the approaches that represent a word X by a function f(P(C|X)), where C is a context feature, P(C|X) is the conditional probability estimated from a text corpus, and the function f maps the co-occurrence measure to a prediction score. We investigate the impact of context feature C and the function f . We also explain the reasons why using the co-occurrences with multiple context features may be better than just using a single one. In addition, based on the analysis, we propose a hypothesis about the conditional probability on zero probability events.},
  url       = {http://www.aclweb.org/anthology/C18-1241}
}

@InProceedings{liu-morin-saldarriaga:2018:C18-1,
  author    = {Liu, Jingshu  and  Morin, Emmanuel  and  Saldarriaga, Peña},
  title     = {Towards a unified framework for bilingual terminology extraction of single-word and multi-word terms},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2855--2866},
  abstract  = {Extracting a bilingual terminology for multi-word terms from comparable corpora has not been widely researched. In this work we propose a unified framework for aligning bilingual terms independently of the term lengths. We also introduce some enhancements to the context-based and the neural network based approaches. Our experiments show the effectiveness of our enhancements of previous works and the system can be adapted in specialized domains.},
  url       = {http://www.aclweb.org/anthology/C18-1242}
}

@InProceedings{athanasiou-iosif-potamianos:2018:C18-1,
  author    = {Athanasiou, Nikos  and  Iosif, Elias  and  Potamianos, Alexandros},
  title     = {Neural Activation Semantic Models: Computational lexical semantic models of localized neural activations},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2867--2878},
  abstract  = {Neural activation models have been proposed in the literature that use a set of example words for which fMRI measurements are available in order to find a mapping between word semantics and localized neural activations. Successful mappings let us expand to the full lexicon of concrete nouns using the assumption that similarity of meaning implies similar neural activation patterns. In this paper, we propose a computational model that estimates semantic similarity in the neural activation space and investigates the relative performance of this model for various natural language processing tasks. Despite the simplicity of the proposed model and the very small number of example words used to bootstrap it, the neural activation semantic model performs surprisingly well compared to state-of-the-art word embeddings. Specifically, the neural activation semantic model performs better than the state-of-the-art for the task of semantic similarity estimation between very similar or very dissimilar words, while performing well on other tasks such as entailment and word categorization. These are strong indications that neural activation semantic models can not only shed some light into human cognition but also contribute to computation models for certain tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1243}
}

@InProceedings{kar-maharjan-solorio:2018:C18-1,
  author    = {Kar, Sudipta  and  Maharjan, Suraj  and  Solorio, Thamar},
  title     = {Folksonomication: Predicting Tags for Movies from Plot Synopses using Emotion Flow Encoded Neural Network},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2879--2891},
  abstract  = {Folksonomy of movies covers a wide range of heterogeneous information about movies, like the genre, plot structure, visual experiences, soundtracks, metadata, and emotional experiences from watching a movie. Being able to automatically generate or predict tags for movies can help recommendation engines improve retrieval of similar movies, and help viewers know what to expect from a movie in advance. In this work, we explore the problem of creating tags for movies from plot synopses. We propose a novel neural network model that merges information from synopses and emotion flows throughout the plots to predict a set of tags for movies. We compare our system with multiple baselines and found that the addition of emotion flows boosts the performance of the network by learning ≈18% more tags than a traditional machine learning system.},
  url       = {http://www.aclweb.org/anthology/C18-1244}
}

@InProceedings{buechel-hahn:2018:C18-1,
  author    = {Buechel, Sven  and  Hahn, Udo},
  title     = {Emotion Representation Mapping for Automatic Lexicon Construction (Mostly) Performs on Human Level},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2892--2904},
  abstract  = {Emotion Representation Mapping (ERM) has the goal to convert existing emotion ratings from one representation format into another one, e.g., mapping Valence-Arousal-Dominance annotations for words or sentences into Ekman's Basic Emotions and vice versa. ERM can thus not only be considered as an alternative to Word Emotion Induction (WEI) techniques for automatic emotion lexicon construction but may also help mitigate problems that come from the proliferation of emotion representation formats in recent years. We propose a new neural network approach to ERM that not only outperforms the previous state-of-the-art. Equally important, we present a refined evaluation methodology and gather strong evidence that our model yields results which are (almost) as reliable as human annotations, even in cross-lingual settings. Based on these results we generate new emotion ratings for 13 typologically diverse languages and claim that they have near-gold quality, at least.},
  url       = {http://www.aclweb.org/anthology/C18-1245}
}

@InProceedings{tafreshi-diab:2018:C18-1,
  author    = {Tafreshi, Shabnam  and  Diab, Mona},
  title     = {Emotion Detection and Classification in a Multigenre Corpus with Joint Multi-Task Deep Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2905--2913},
  abstract  = {Detection and classification of emotion categories expressed by a sentence is a challenging task due to subjectivity of emotion. To date, most of the models are trained and evaluated on single genre and when used to predict emotion in different genre their performance drops by a large margin. To address the issue of robustness, we model the problem within a joint multi-task learning framework. We train this model with a multigenre emotion corpus to predict emotions across various genre. Each genre is represented as a separate task, we use soft parameter shared layers across the various tasks. our experimental results show that this model improves the results across the various genres, compared to a single genre training in the same neural net architecture.},
  url       = {http://www.aclweb.org/anthology/C18-1246}
}

@InProceedings{kulshreshtha-goel-kumarsingh:2018:C18-1,
  author    = {Kulshreshtha, Devang  and  Goel, Pranav  and  Kumar Singh, Anil},
  title     = {How emotional are you? Neural Architectures for Emotion Intensity Prediction in Microblogs},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2914--2926},
  abstract  = {Social media based micro-blogging sites like Twitter have become a common source of real-time information (impacting organizations and their strategies, and are used for expressing emotions and opinions. Automated analysis of such content therefore rises in importance. To this end, we explore the viability of using deep neural networks on the specific task of emotion intensity prediction in tweets. We propose a neural architecture combining convolutional and fully connected layers in a non-sequential manner - done for the first time in context of natural language based tasks. Combined with lexicon-based features along with transfer learning, our model achieves state-of-the-art performance, outperforming the previous system by 0.044 or 4.4\% Pearson correlation on the WASSA'17 EmoInt shared task dataset. We investigate the performance of deep multi-task learning models trained for all emotions at once in a unified architecture and get encouraging results. Experiments performed on evaluating correlation between emotion pairs offer interesting insights into the relationship between them.},
  url       = {http://www.aclweb.org/anthology/C18-1247}
}

@InProceedings{cachola-EtAl:2018:C18-1,
  author    = {Cachola, Isabel  and  Holgate, Eric  and  Preoţiuc-Pietro, Daniel  and  Li, Junyi Jessy},
  title     = {Expressively vulgar: The socio-dynamics of vulgarity and its effects on sentiment analysis in social media},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2927--2938},
  abstract  = {Vulgarity is a common linguistic expression and is used to perform several linguistic functions. Understanding their usage can aid both linguistic and psychological phenomena as well as benefit downstream natural language processing applications such as sentiment analysis. This study performs a large-scale, data-driven empirical analysis of vulgar words using social media data. We analyze the socio-cultural and pragmatic aspects of vulgarity using tweets from users with known demographics. Further, we collect sentiment ratings for vulgar tweets to study the relationship between the use of vulgar words and perceived sentiment and show that explicitly modeling vulgar words can boost sentiment analysis performance.},
  url       = {http://www.aclweb.org/anthology/C18-1248}
}

@InProceedings{howell-zamaraeva:2018:C18-1,
  author    = {Howell, Kristen  and  Zamaraeva, Olga},
  title     = {Clausal Modifiers in the Grammar Matrix},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2939--2952},
  abstract  = {We extend the coverage of an existing grammar customization system to clausal modifiers, also referred to as adverbial clauses. We present an analysis, taking a typologically-driven approach to account for this phenomenon across the world’s languages, which we implement in the Grammar Matrix customization system (Bender et al., 2002, 2010). Testing our analysis on testsuites from five genetically and geographically diverse languages that were not considered in development, we achieve 88.4% coverage and 1.5% overgeneration.},
  url       = {http://www.aclweb.org/anthology/C18-1249}
}

@InProceedings{yu-liu:2018:C18-1,
  author    = {Yu, Zeping  and  Liu, Gongshen},
  title     = {Sliced Recurrent Neural Networks},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2953--2964},
  abstract  = {Recurrent neural networks have achieved great success in many NLP tasks. However, they have difficulty in parallelization because of the recurrent structure, so it takes much time to train RNNs. In this paper, we introduce sliced recurrent neural networks (SRNNs), which could be parallelized by slicing the sequences into many subsequences. SRNNs have the ability to obtain high-level information through multiple layers with few extra parameters. We prove that the standard RNN is a special case of the SRNN when we use linear activation functions. Without changing the recurrent units, SRNNs are 136 times as fast as standard RNNs and could be even faster when we train longer sequences. Experiments on six large-scale sentiment analysis datasets show that SRNNs achieve better performance than standard RNNs.},
  url       = {http://www.aclweb.org/anthology/C18-1250}
}

@InProceedings{changpinyo-hu-sha:2018:C18-1,
  author    = {Changpinyo, Soravit  and  Hu, Hexiang  and  Sha, Fei},
  title     = {Multi-Task Learning for Sequence Tagging: An Empirical Study},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2965--2977},
  abstract  = {We study three general multi-task learning (MTL) approaches on 11 sequence tagging tasks. Our extensive empirical results show that in about 50% of the cases, jointly learning all 11 tasks improves upon either independent or pairwise learning of the tasks. We also show that pairwise MTL can inform us what tasks can benefit others or what tasks can be benefited if they are learned jointly. In particular, we identify tasks that can always benefit others as well as tasks that can always be harmed by others. Interestingly, one of our MTL approaches yields embeddings of the tasks that reveal the natural clustering of semantic and syntactic tasks. Our inquiries have opened the doors to further utilization of MTL in NLP.},
  url       = {http://www.aclweb.org/anthology/C18-1251}
}

@InProceedings{moss-leslie-rayson:2018:C18-1,
  author    = {Moss, Henry  and  Leslie, David  and  Rayson, Paul},
  title     = {Using J-K-fold Cross Validation To Reduce Variance When Tuning NLP Models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2978--2989},
  abstract  = {K-fold cross validation (CV) is a popular method for estimating the true performance of machine learning models, allowing model selection and parameter tuning. However, the very process of CV requires random partitioning of the data and so our performance estimates are in fact stochastic, with variability that can be substantial for natural language processing tasks. We demonstrate that these unstable estimates cannot be relied upon for effective parameter tuning. The resulting tuned parameters are highly sensitive to how our data is partitioned, meaning that we often select sub-optimal parameter choices and have serious reproducibility issues.},
  url       = {http://www.aclweb.org/anthology/C18-1252}
}

@InProceedings{khn:2018:C18-1,
  author    = {Köhn, Arne},
  title     = {Incremental Natural Language Processing: Challenges, Strategies, and Evaluation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2990--3003},
  abstract  = {Incrementality is ubiquitous in human-human interaction and beneficial},
  url       = {http://www.aclweb.org/anthology/C18-1253}
}

@InProceedings{moon-EtAl:2018:C18-1,
  author    = {Moon, Lori  and  Christodoulopoulos, Christos  and  Cynthia, Fisher  and  Franco, Sandra  and  Roth, Dan},
  title     = {Gold Standard Annotations for Preposition and Verb Sense with Semantic Role Labels in Adult-Child Interactions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3004--3014},
  abstract  = {This paper describes the augmentation of an existing corpus of child-directed speech. The resulting corpus is a gold-standard labeled corpus for supervised learning of semantic role labels in adult-child dialogues. Semantic role labeling (SRL) models assign semantic roles to sentence constituents, thus indicating who has done what to whom (and in what way). The current corpus is derived from the Adam files in the Brown corpus (Brown 1973) of the CHILDES corpora, and augments the partial annotation described in Connor et al. (2010). It provides labels for both semantic arguments of verbs and semantic arguments of prepositions. The semantic role labels and senses of verbs follow Propbank guidelines Kingsbury and Palmer, 2002; Gildea and Palmer 2002; Palmer et al., 2005) and those for prepositions follow Srikumar and Roth (2011). The corpus was annotated by two annotators. Inter-annotator agreement is given separately for prepositions and verbs, and for adult speech and child speech. Overall, across child and adult samples, including verbs and prepositions, the kappa score for sense is 72.6, for the number of semantic-role-bearing arguments, the kappa score is 77.4, for identical semantic role labels on a given argument, the kappa score is 91.1, for the span of semantic role labels, and the kappa for agreement is 93.9. The sense and number of arguments was often open to multiple interpretations in child speech, due to the rapidly changing discourse and omission of constituents in production. Annotators used a discourse context window of ten sentences before and ten sentences after the target utterance to determine the annotation labels. The derived corpus is available for use in CHAT (MacWhinney, 2000) and XML format.},
  url       = {http://www.aclweb.org/anthology/C18-1254}
}

@InProceedings{wang-EtAl:2018:C18-18,
  author    = {Wang, Qiang  and  Li, Fuxue  and  Xiao, Tong  and  Li, Yanyang  and  Li, Yinqiao  and  Zhu, Jingbo},
  title     = {Multi-layer Representation Fusion for Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3015--3026},
  abstract  = {Neural machine translation systems require a number of stacked layers for deep models. But the prediction depends on the sentence representation of the top-most layer with no access to low-level representations. This makes it more difficult to train the model and poses a risk of information loss to prediction. In this paper, we propose a multi-layer representation fusion (MLRF) approach to fusing stacked layers. In particular, we design three fusion functions to learn a better representation from the stack. Experimental results show that our approach yields improvements of 0.92 and 0.56 BLEU points over the strong Transformer baseline on IWSLT German-English and NIST Chinese-English MT tasks respectively. The result is new state-of-the-art in German-English translation.},
  url       = {http://www.aclweb.org/anthology/C18-1255}
}

@InProceedings{mi-EtAl:2018:C18-1,
  author    = {Mi, Chenggang  and  Yang, Yating  and  Wang, Lei  and  Zhou, Xi  and  Jiang, Tonghai},
  title     = {Toward Better Loanword Identification in Uyghur Using Cross-lingual Word Embeddings},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3027--3037},
  abstract  = {To enrich vocabulary of low resource settings, we proposed a novel method which identify loanwords in monolingual corpora. More specifically, we first use cross-lingual word embeddings as the core feature to generate semantically related candidates based on comparable corpora and a small bilingual lexicon; then, a log-linear model which combines several shallow features such as pronunciation similarity and hybrid language model features to predict the final results. In this paper, we use Uyghur as the receipt language and try to detect loanwords in four donor languages: Arabic, Chinese, Persian and Russian. We conduct two groups of experiments to evaluate the effectiveness of our proposed approach: loanword identification and OOV translation in four language pairs and eight translation directions (Uyghur-Arabic, Arabic-Uyghur, Uyghur-Chinese, Chinese-Uyghur, Uyghur-Persian, Persian-Uyghur, Uyghur-Russian, and Russian-Uyghur). Experimental results on loanword identification show that our method outperforms other baseline models significantly. Neural machine translation models integrating results of loanword identification experiments achieve the best results on OOV translation(with 0.5-0.9 BLEU improvements)},
  url       = {http://www.aclweb.org/anthology/C18-1256}
}

@InProceedings{li-li-zhang:2018:C18-1,
  author    = {Li, Yachao  and  Li, Junhui  and  Zhang, Min},
  title     = {Adaptive Weighting for Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3038--3048},
  abstract  = {In the popular sequence to sequence (seq2seq) neural machine translation (NMT), there exist many weighted sum models (WSMs), each of which takes a set of input and generates one output. However, the weights in a WSM are independent of each other and fixed for all inputs, suggesting that by ignoring different needs of inputs, the WSM lacks effective control on the influence of each input. In this paper, we propose adaptive weighting for WSMs to control the contribution of each input. Specifically, we apply adaptive weighting for both GRU and the output state in NMT. Experimentation on Chinese-to-English translation and English-to-German translation demonstrates that the proposed adaptive weighting is able to much improve translation accuracy by achieving significant improvement of 1.49 and 0.92 BLEU points for the two translation tasks. Moreover, we discuss in-depth on what type of information is encoded in the encoder and how information influences the generation of target words in the decoder.},
  url       = {http://www.aclweb.org/anthology/C18-1257}
}

@InProceedings{gebhardt:2018:C18-1,
  author    = {Gebhardt, Kilian},
  title     = {Generic refinement of expressive grammar formalisms with an application to discontinuous constituent parsing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3049--3063},
  abstract  = {We formulate a generalization of Petrov et al. (2006)’s split/merge algorithm for interpreted regular tree grammars (Koller and Kuhlmann, 2011), which capture a large class of grammar formalisms. We evaluate its effectiveness empirically on the task of discontinuous constituent parsing with two mildly context-sensitive grammar formalisms: linear context-free rewriting systems (Vijay-Shanker et al., 1987) as well as hybrid grammars (Nederhof and Vogler, 2014).},
  url       = {http://www.aclweb.org/anthology/C18-1258}
}

@InProceedings{song-EtAl:2018:C18-1,
  author    = {Song, Kaitao  and  Tan, Xu  and  He, Di  and  Lu, Jianfeng  and  Qin, Tao  and  Liu, Tie-Yan},
  title     = {Double Path Networks for Sequence to Sequence Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3064--3074},
  abstract  = {Encoder-decoder based Sequence to Sequence learning (S2S) has made remarkable progress in recent years. Different network architectures have been used in the encoder/decoder. Among them, Convolutional Neural Networks (CNN) and Self Attention Networks (SAN) are the prominent ones. The two architectures achieve similar performances but use very different ways to encode and decode context: CNN use convolutional layers to focus on the local connectivity of the sequence, while SAN uses self-attention layers to focus on global semantics. In this work we propose Double Path Networks for Sequence to Sequence learning (DPN-S2S), which leverage the advantages of both models by using double path information fusion. During the encoding step, we develop a double path architecture to maintain the information coming from different paths with convolutional layers and self-attention layers separately. To effectively use the encoded context, we develop a gated attention fusion module and use it to automatically pick up the information needed during the decoding step, which is also a double path network. By deeply integrating the two paths, both types of information are combined and well exploited. Experiments show that our proposed method can significantly improve the performance of sequence to sequence learning over state-of-the-art systems.},
  url       = {http://www.aclweb.org/anthology/C18-1259}
}

@InProceedings{nguyen-EtAl:2018:C18-1,
  author    = {Nguyen, Quy  and  Miyao, Yusuke  and  Noji, Hiroshi  and  Nguyen, Nhung},
  title     = {An Empirical Investigation of Error Types in Vietnamese Parsing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3075--3089},
  abstract  = {Syntactic parsing plays a crucial role in improving the quality of natural language processing tasks. Although there have been several research projects on syntactic parsing in Vietnamese, the parsing quality has been far inferior than those reported in major languages, such as English and Chinese. In this work, we evaluated representative constituency parsing models on a Vietnamese Treebank to look for the most suitable parsing method for Vietnamese. We then combined the advantages of automatic and manual analysis to investigate errors produced by the experimented parsers and find the reasons for them. Our analysis focused on three possible sources of parsing errors, namely limited training data, part-of-speech (POS) tagging errors, and ambiguous constructions. As a result, we found that the last two sources, which frequently appear in Vietnamese text, significantly attributed to the poor performance of Vietnamese parsing.},
  url       = {http://www.aclweb.org/anthology/C18-1260}
}

@InProceedings{labeau-allauzen:2018:C18-1,
  author    = {Labeau, Matthieu  and  Allauzen, Alexandre},
  title     = {Learning with Noise-Contrastive Estimation: Easing training by learning to scale},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3090--3101},
  abstract  = {Noise-Contrastive Estimation (NCE) is a learning criterion that is regularly used to train neural language models in place of Maximum Likelihood Estimation, since it avoids the computational bottleneck caused by the output softmax. In this paper, we analyse and explain some of the weaknesses of this objective function, linked to the mechanism of self-normalization, by closely monitoring comparative experiments. We then explore several remedies and modifications to propose tractable and efficient NCE training strategies. In particular, we propose to make the scaling factor a trainable parameter of the model, and to use the noise distribution to initialize the output bias. These solutions, yet simple, yield stable and competitive performances in either small and large scale language modelling tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1261}
}

@InProceedings{abate-EtAl:2018:C18-1,
  author    = {Abate, Solomon Teferra  and  Melese, Michael  and  Tachbelie, Martha Yifiru  and  Meshesha, Million  and  Atinafu, Solomon  and  Mulugeta, Wondwossen  and  Assibie, Yaregal  and  Abera, Hafte  and  Ephrem, Binyam  and  Abebe, Tewodros  and  Tsegaye, Wondimagegnhue  and  Lemma, Amanuel  and  Andargie, Tsegaye  and  Shifaw, Seifedin},
  title     = {Parallel Corpora for bi-lingual English-Ethiopian Languages Statistical Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3102--3111},
  abstract  = {In this paper, we describe an attempt towards the development of parallel corpora for English and Ethiopian Languages, such as Amharic, Tigrigna, Afan-Oromo, Wolaytta and Ge'ez. The corpora are used for conducting a bi-directional statistical machine translation experiments. The BLEU scores of the bi-directional Statistical Machine Translation (SMT) systems show a promising result. The morphological richness of the Ethiopian languages has a great impact on the performance of SMT specially when the targets are Ethiopian languages. Now we are working towards an optimal alignment for a bi-directional English-Ethiopian languages SMT.},
  url       = {http://www.aclweb.org/anthology/C18-1262}
}

@InProceedings{blackwood-ballesteros-ward:2018:C18-1,
  author    = {Blackwood, Graeme  and  Ballesteros, Miguel  and  Ward, Todd},
  title     = {Multilingual Neural Machine Translation with Task-Specific Attention},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3112--3122},
  abstract  = {Multilingual machine translation addresses the task of translating between multiple source and target languages. We propose task-specific attention models, a simple but effective technique for improving the quality of sequence-to-sequence neural multilingual translation. Our approach seeks to retain as much of the parameter sharing generalization of NMT models as possible, while still allowing for language-specific specialization of the attention model to a particular language-pair or task. Our experiments on four languages of the Europarl corpus show that using a target-specific model of attention provides consistent gains in translation quality for all possible translation directions, compared to a model in which all parameters are shared. We observe improved translation quality even in the (extreme) low-resource zero-shot translation directions for which the model never saw explicitly paired parallel data.},
  url       = {http://www.aclweb.org/anthology/C18-1263}
}

@InProceedings{dellert:2018:C18-1,
  author    = {Dellert, Johannes},
  title     = {Combining Information-Weighted Sequence Alignment and Sound Correspondence Models for Improved Cognate Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3123--3133},
  abstract  = {Methods for automated cognate detection in historical linguistics invariably build on some measure of form similarity which is designed to capture the remaining systematic similarities between cognate word forms after thousands of years of divergence. A wide range of clustering and classification algorithms has been explored for the purpose, whereas possible improvements on the level of pairwise form similarity measures have not been the main focus of research. The approach presented in this paper improves on this core component of cognate detection systems by a novel combination of information weighting, a technique for putting less weight on reoccurring morphological material, with sound correspondence modeling by means of pointwise mutual information. In evaluations on expert cognacy judgments over a subset of the IPA-encoded NorthEuraLex database, the combination of both techniques is shown to lead to considerable improvements in average precision for binary cognate detection, and modest improvements for distance-based cognate clustering.},
  url       = {http://www.aclweb.org/anthology/C18-1264}
}

@InProceedings{passban-way-liu:2018:C18-1,
  author    = {Passban, Peyman  and  Way, Andy  and  Liu, Qun},
  title     = {Tailoring Neural Architectures for Translating from Morphologically Rich Languages},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3134--3145},
  abstract  = {A morphologically complex word (MCW) is a hierarchical constituent with meaning-preserving subunits, so word-based models which rely on surface forms might not be powerful enough to translate such structures. When translating from morphologically rich languages (MRLs), a source word could be mapped to several words or even a full sentence on the target side, which means an MCW should not be treated as an atomic unit. In order to provide better translations for MRLs, we boost the existing neural machine translation (NMT) architecture with a double- channel encoder and a double-attentive decoder. The main goal targeted in this research is to provide richer information on the encoder side and redesign the decoder accordingly to benefit from such information. Our experimental results demonstrate that we could achieve our goal as the proposed model outperforms existing subword- and character-based architectures and showed significant improvements on translating from German, Russian, and Turkish into English.},
  url       = {http://www.aclweb.org/anthology/C18-1265}
}

@InProceedings{ive-blain-specia:2018:C18-1,
  author    = {Ive, Julia  and  Blain, Frédéric  and  Specia, Lucia},
  title     = {deepQuest: A Framework for Neural-based Quality Estimation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3146--3157},
  abstract  = {Predicting Machine Translation (MT) quality can help in many practical tasks such as MT post-editing. The performance of Quality Estimation (QE) methods has drastically improved recently with the introduction of neural approaches to the problem. However, thus far neural approaches have only been designed for word and sentence-level prediction.},
  url       = {http://www.aclweb.org/anthology/C18-1266}
}

@InProceedings{kabbach-ribeyre-herbelot:2018:C18-1,
  author    = {Kabbach, Alexandre  and  Ribeyre, Corentin  and  Herbelot, Aurélie},
  title     = {Butterfly Effects in Frame Semantic Parsing: impact of data processing on model ranking},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3158--3169},
  abstract  = {Knowing the state-of-the-art for a particular task is an essential component of any computational linguistics investigation. But can we be truly confident that the current state-of-the-art is indeed the best performing model? In this paper, we study the case of frame semantic parsing, a well-established task with multiple shared datasets. We show that in spite of all the care taken to provide a standard evaluation resource, small variations in data processing can have dramatic consequences for ranking parser performance. This leads us to propose an open-source standardized processing pipeline, which can be shared and reused for robust model comparison.},
  url       = {http://www.aclweb.org/anthology/C18-1267}
}

@InProceedings{sadeghi-scheutz:2018:C18-1,
  author    = {Sadeghi, Sepideh  and  Scheutz, Matthias},
  title     = {Sensitivity to Input Order: Evaluation of an Incremental and Memory-Limited Bayesian Cross-Situational Word Learning Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3170--3180},
  abstract  = {We present a variation of the incremental and memory-limited algorithm in (Sadeghi et al., 2017) for Bayesian cross-situational word learning and evaluate the model in terms of its functional performance and its sensitivity to input order. We show that the functional performance of our sub-optimal model on corpus data is close to that of its optimal counterpart (Frank et al., 2009),},
  url       = {http://www.aclweb.org/anthology/C18-1268}
}

@InProceedings{zhang-xiong:2018:C18-1,
  author    = {Zhang, Shiqi  and  Xiong, Deyi},
  title     = {Sentence Weighting for Neural Machine Translation Domain Adaptation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3181--3190},
  abstract  = {In this paper, we propose a new sentence weighting method for the domain adaptation of neural machine translation. We introduce a domain similarity metric to evaluate the relevance between a sentence and an available entire domain dataset. The similarity of each sentence to the target domain is calculated with various methods. The computed similarity is then integrated into the training objective to weight sentences. The adaptation results on both IWSLT Chinese-English TED task and a task with only synthetic training parallel data show that our sentence weighting method is able to achieve an significant improvement over strong baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1269}
}

@InProceedings{aufrant-wisniewski-yvon:2018:C18-1,
  author    = {Aufrant, Lauriane  and  Wisniewski, Guillaume  and  Yvon, François},
  title     = {Quantifying training challenges of dependency parsers},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3191--3202},
  abstract  = {Not all dependencies are equal when training a dependency parser: some are straightforward enough to be learned with only a sample of data, others embed more complexity. This work introduces a series of metrics to quantify those differences, and thereby to expose the shortcomings of various parsing algorithms and strategies. Apart from a more thorough comparison of parsing systems, these new tools also prove useful for characterizing the information conveyed by cross-lingual parsers, in a quantitative but still interpretable way.},
  url       = {http://www.aclweb.org/anthology/C18-1270}
}

@InProceedings{li-EtAl:2018:C18-13,
  author    = {Li, Zuchao  and  Cai, Jiaxun  and  He, Shexia  and  Zhao, Hai},
  title     = {Seq2seq Dependency Parsing},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3203--3214},
  abstract  = {This paper presents a sequence to sequence (seq2seq) dependency parser by directly predicting the relative position of head for each given word, which therefore results in a truly end-to-end seq2seq dependency parser for the first time. Enjoying the advantage of seq2seq modeling, we enrich a series of embedding enhancement, including firstly introduced subword and node2vec augmentation. Meanwhile, we propose a beam search decoder with tree constraint and subroot decomposition over the sequence to furthermore enhance our seq2seq parser. Our parser is evaluated on benchmark treebanks, being on par with the state-of-the-art parsers by achieving 94.11% UAS on PTB and 88.78% UAS on CTB, respectively.},
  url       = {http://www.aclweb.org/anthology/C18-1271}
}

@InProceedings{kdr-EtAl:2018:C18-1,
  author    = {Kádár, Ákos  and  Côté, Marc-Alexandre  and  Chrupała, Grzegorz  and  Alishahi, Afra},
  title     = {Revisiting the Hierarchical Multiscale LSTM},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3215--3227},
  abstract  = {Hierarchical Multiscale LSTM (Chung et. al., 2016) is a state-of-the-art language model that learns interpretable structure from character-level input. Such models can provide fertile ground for (cognitive) computational linguistics studies. However, the high complexity of the architecture, training and implementations might hinder its applicability. We provide a detailed reproduction and ablation study of the architecture, shedding light on some of the potential caveats of re-purposing complex deep-learning architectures. We further show that simplifying certain aspects of the architecture can in fact improve its performance. We also investigate the linguistic units (segments) learned by various levels of the model, and argue that their quality does not correlate with the overall performance of the model on language modeling.},
  url       = {http://www.aclweb.org/anthology/C18-1272}
}

@InProceedings{lee-EtAl:2018:C18-12,
  author    = {Lee, Chanhee  and  Kim, Young-Bum  and  Lee, Dongyub  and  Lim, Heuiseok},
  title     = {Character-Level Feature Extraction with Densely Connected Networks},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3228--3239},
  abstract  = {Generating character-level features is an important step for achieving good results in various natural language processing tasks. To alleviate the need for human labor in generating hand-crafted features, methods that utilize neural architectures such as Convolutional Neural Network (CNN) or Recurrent Neural Network (RNN) to automatically extract such features have been proposed and have shown great results. However, CNN generates position-independent features, and RNN is slow since it needs to process the characters sequentially. In this paper, we propose a novel method of using a densely connected network to automatically extract character-level features. The proposed method does not require any language or task specific assumptions, and shows robustness and effectiveness while being faster than CNN- or RNN-based methods. Evaluating this method on three sequence labeling tasks - slot tagging, Part-of-Speech (POS) tagging, and Named-Entity Recognition (NER) - we obtain state-of-the-art performance with a 96.62 F1-score and 97.73% accuracy on slot tagging and POS tagging, respectively, and comparable performance to the state-of-the-art 91.13 F1-score on NER.},
  url       = {http://www.aclweb.org/anthology/C18-1273}
}

@InProceedings{ugawa-EtAl:2018:C18-1,
  author    = {Ugawa, Arata  and  Tamura, Akihiro  and  Ninomiya, Takashi  and  Takamura, Hiroya  and  Okumura, Manabu},
  title     = {Neural Machine Translation Incorporating Named Entity},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3240--3250},
  abstract  = {This study proposes a new neural machine translation (NMT) model based on the encoder-decoder model that incorporates named entity (NE) tags of source-language sentences. Conventional NMT models have two problems enumerated as follows: (i) they tend to have difficulty in translating words with multiple meanings because of the high ambiguity, and (ii) these models’abilitytotranslatecompoundwordsseemschallengingbecausetheencoderreceivesaword, a part of the compound word, at each time step. To alleviate these problems, the encoder of the proposed model encodes the input word on the basis of its NE tag at each time step, which could reduce the ambiguity of the input word. Furthermore,the encoder introduces a chunk-level LSTM layer over a word-level LSTM layer and hierarchically encodes a source-language sentence to capture a compound NE as a chunk on the basis of the NE tags. We evaluate the proposed model on an English-to-Japanese translation task with the ASPEC, and English-to-Bulgarian and English-to-Romanian translation tasks with the Europarl corpus. The evaluation results show that the proposed model achieves up to 3.11 point improvement in BLEU.},
  url       = {http://www.aclweb.org/anthology/C18-1274}
}

@InProceedings{gupta-EtAl:2018:C18-12,
  author    = {Gupta, Abhirut  and  Ray, Anupama  and  Dasgupta, Gargi  and  Singh, Gautam  and  Aggarwal, Pooja  and  Mohapatra, Prateeti},
  title     = {Semantic Parsing for Technical Support Questions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3251--3259},
  abstract  = {Technical support problems are very complex. In contrast to regular web queries (that contain few keywords) or factoid questions (which are a few sentences), these problems usually include attributes like a detailed description of what is failing (symptom), steps taken in an effort to remediate the failure (activity), and sometimes a specific request or ask (intent). Automating support is the task of automatically providing answers to these problems given a corpus of solution documents. Traditional approaches to this task rely on information retrieval and are keyword based; looking for keyword overlap between the question and solution documents and ignoring these attributes. We present an approach for semantic parsing of technical questions that uses grammatical structure to extract these attributes as a baseline, and a CRF based model that can improve performance considerably in the presence of annotated data for training. We also demonstrate that combined with reasoning, these attributes help outperform retrieval baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1275}
}

@InProceedings{lin-EtAl:2018:C18-1,
  author    = {Lin, Junyang  and  Sun, Xu  and  Ren, Xuancheng  and  Ma, Shuming  and  Su, Jinsong  and  Su, Qi},
  title     = {Deconvolution-Based Global Decoding for Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3260--3271},
  abstract  = {A great proportion of sequence-to-sequence (Seq2Seq) models for Neural Machine Translation (NMT) adopt Recurrent Neural Network (RNN) to generate translation word by word following a sequential order. As the studies of linguistics have proved that language is not linear word sequence but sequence of complex structure, translation at each step should be conditioned on the whole target-side context. To tackle the problem, we propose a new NMT model that decodes the sequence with the guidance of its structural prediction of the context of the target sequence. Our model generates translation based on the structural prediction of the target-side context so that the translation can be freed from the bind of sequential order. Experimental results demonstrate that our model is more competitive compared with the state-of-the-art methods, and the analysis reflects that our model is also robust to translating sentences of different lengths and it also reduces repetition with the instruction from the target-side context for decoding.},
  url       = {http://www.aclweb.org/anthology/C18-1276}
}

@InProceedings{hao-EtAl:2018:C18-1,
  author    = {Hao, Yanchao  and  Liu, Hao  and  He, Shizhu  and  Liu, Kang  and  Zhao, Jun},
  title     = {Pattern-revising Enhanced Simple Question Answering over Knowledge Bases},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3272--3282},
  abstract  = {Question Answering over Knowledge Bases (KB-QA), which automatically answer natural language questions based on the facts contained by a knowledge base, is one of the most important natural language processing (NLP) tasks. Simple questions constitute a large part of questions queried on the web, still being a challenge to QA systems.},
  url       = {http://www.aclweb.org/anthology/C18-1277}
}

@InProceedings{tayyarmadabushi-lee-barnden:2018:C18-1,
  author    = {Tayyar Madabushi, Harish  and  Lee, Mark  and  Barnden, John},
  title     = {Integrating Question Classification and Deep Learning for improved Answer Selection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3283--3294},
  abstract  = {We present a system for Answer Selection that integrates fine-grained Question Classification with a Deep Learning model designed for Answer Selection. We detail the necessary changes to the Question Classification taxonomy and system, the creation of a new Entity Identification system and methods of highlighting entities to achieve this objective. Our experiments show that Question Classes are a strong signal to Deep Learning models for Answer Selection, and enable us to outperform the current state of the art in all variations of our experiments except one. In the best configuration, our MRR and MAP scores outperform the current state of the art by between 3 and 5 points on both versions of the TREC Answer Selection test set, a standard dataset for this task.},
  url       = {http://www.aclweb.org/anthology/C18-1278}
}

@InProceedings{deng-EtAl:2018:C18-1,
  author    = {Deng, Yang  and  Shen, Ying  and  Yang, Min  and  Li, Yaliang  and  Du, Nan  and  Fan, Wei  and  Lei, Kai},
  title     = {Knowledge as A Bridge: Improving Cross-domain Answer Selection with External Knowledge},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3295--3305},
  abstract  = {Answer selection is an important but challenging task. Significant progresses have been made in domains where a large amount of labeled training data is available. However, obtaining rich annotated data is a time-consuming and expensive process, creating a substantial barrier for applying answer selection models to a new domain which has limited labeled data. In this paper, we propose Knowledge-aware Attentive Network (KAN), a transfer learning framework for cross-domain answer selection, which uses the knowledge base as a bridge to enable knowledge transfer from the source domain to the target domains. Specifically, we design a knowledge module to integrate the knowledge-based representational learning into answer selection models. The learned knowledge-based representations are shared by source and target domains, which not only leverages large amounts of cross-domain data, but also benefits from a regularization effect that leads to more general representations to help tasks in new domains. To verify the effectiveness of our model, we use SQuAD-T dataset as the source domain and three other datasets (i.e., Yahoo QA, TREC QA and InsuranceQA) as the target domains. The experimental results demonstrate that KAN has remarkable applicability and generality, and consistently outperforms the strong competitors by a noticeable margin for cross-domain answer selection.},
  url       = {http://www.aclweb.org/anthology/C18-1279}
}

@InProceedings{sorokin-gurevych:2018:C18-1,
  author    = {Sorokin, Daniil  and  Gurevych, Iryna},
  title     = {Modeling Semantics with Gated Graph Neural Networks for Knowledge Base Question Answering},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3306--3317},
  abstract  = {The most approaches to Knowledge Base Question Answering are based on semantic parsing. In this paper, we address the problem of learning vector representations for complex semantic parses that consist of multiple entities and relations. Previous work largely focused on selecting the correct semantic relations for a question and disregarded the structure of the semantic parse: the connections between entities and the directions of the relations. We propose to use Gated Graph Neural Networks to encode the graph structure of the semantic parse. We show on two data sets that the graph networks outperform all baseline models that do not explicitly model the structure. The error analysis confirms that our approach can successfully process complex semantic parses.},
  url       = {http://www.aclweb.org/anthology/C18-1280}
}

@InProceedings{amidei-piwek-willis:2018:C18-1,
  author    = {Amidei, Jacopo  and  Piwek, Paul  and  Willis, Alistair},
  title     = {Rethinking the Agreement in Human Evaluation Tasks},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3318--3329},
  abstract  = {Human evaluations are broadly thought to be more valuable the higher the inter-annotator agreement. In this paper we examine this idea. We will describe our experiments and analysis within the area of Automatic Question Generation. Our experiments show how annotators diverge in language annotation tasks due to a range of ineliminable factors. For this reason, we believe that annotation schemes for natural language generation tasks that are aimed at evaluating language quality need to be treated with great care. In particular, an unchecked focus on reduction of disagreement among annotators runs the danger of creating generation goals that reward output that is more distant from, rather than closer to, natural human-like language. We conclude the paper by suggesting a new approach to the use of the agreement metrics in natural language generation evaluation tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1281}
}

@InProceedings{ghaeini-EtAl:2018:C18-1,
  author    = {Ghaeini, Reza  and  Fern, Xiaoli  and  Shahbazi, Hamed  and  Tadepalli, Prasad},
  title     = {Dependent Gated Reading for Cloze-Style Question Answering},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3330--3345},
  abstract  = {We present a novel deep learning architecture to address the cloze-style question answering task. Existing approaches employ reading mechanisms that do not fully exploit the interdependency between the document and the query. In this paper, we propose a novel \emph{dependent gated reading} bidirectional GRU network (DGR) to efficiently model the relationship between the document and the query during encoding and decision making. Our evaluation shows that DGR obtains highly competitive performance on well-known machine comprehension benchmarks such as the Children's Book Test (CBT-NE and CBT-CN) and Who DiD What (WDW, Strict and Relaxed). Finally, we extensively analyze and validate our model by ablation and attention studies.},
  url       = {http://www.aclweb.org/anthology/C18-1282}
}

@InProceedings{thorne-vlachos:2018:C18-1,
  author    = {Thorne, James  and  Vlachos, Andreas},
  title     = {Automated Fact Checking: Task Formulations, Methods and Future Directions},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3346--3359},
  abstract  = {The recently increased focus on misinformation has stimulated research in fact checking, the task of assessing the truthfulness of a claim. Research in automating this task has been conducted in a variety of disciplines including natural language processing, machine learning, knowledge representation, databases, and journalism. While there has been substantial progress, relevant papers and articles have been published in research communities that are often unaware of each other and use inconsistent terminology, thus impeding understanding and further progress. In this paper we survey automated fact checking research stemming from natural language processing and related disciplines, unifying the task formulations and methodologies across papers and authors. Furthermore, we highlight the use of evidence as an important distinguishing factor among them cutting across task formulations and methods. We conclude with proposing avenues for future NLP research on automated fact checking.},
  url       = {http://www.aclweb.org/anthology/C18-1283}
}

@InProceedings{dungs-EtAl:2018:C18-1,
  author    = {Dungs, Sebastian  and  Aker, Ahmet  and  Fuhr, Norbert  and  Bontcheva, Kalina},
  title     = {Can Rumour Stance Alone Predict Veracity?},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3360--3370},
  abstract  = {Prior manual studies of rumours suggested that crowd stance can give insights into the actual rumour veracity. Even though numerous studies of automatic veracity classification of social media rumours have been carried out, none explored the effectiveness of leveraging crowd stance to determine veracity. We use stance as an additional feature to those commonly used in earlier studies. We also model the veracity of a rumour using variants of Hidden Markov Models (HMM) and the collective stance information. This paper demonstrates that HMMs that use stance and tweets’ times as the only features for modelling true and false rumours achieve F1 scores in the range of 80%, outperforming those approaches where stance is used jointly with content and user based features.},
  url       = {http://www.aclweb.org/anthology/C18-1284}
}

@InProceedings{desarkar-yang-mukherjee:2018:C18-1,
  author    = {De Sarkar, Sohan  and  Yang, Fan  and  Mukherjee, Arjun},
  title     = {Attending Sentences to detect Satirical Fake News},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3371--3380},
  abstract  = {Satirical news detection is important in order to prevent the spread of misinformation over the Internet. Existing approaches to capture news satire use machine learning models such as SVM and hierarchical neural networks along with hand-engineered features, but do not explore sentence and document difference. This paper proposes a robust, hierarchical deep neural network approach for satire detection, which is capable of capturing satire both at the sentence level and at the document level. The architecture incorporates pluggable generic neural networks like CNN, GRU, and LSTM. Experimental results on real world news satire dataset show substantial performance gains demonstrating the effectiveness of our proposed approach. An inspection of the learned models reveals the existence of key sentences that control the presence of satire in news.},
  url       = {http://www.aclweb.org/anthology/C18-1285}
}

@InProceedings{sasaki-EtAl:2018:C18-1,
  author    = {Sasaki, Akira  and  Hanawa, Kazuaki  and  Okazaki, Naoaki  and  Inui, Kentaro},
  title     = {Predicting Stances from Social Media Posts using Factorization Machines},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3381--3390},
  abstract  = {Social media provide platforms to express, discuss, and shape opinions about events and issues in the real world.},
  url       = {http://www.aclweb.org/anthology/C18-1286}
}

@InProceedings{prezrosas-EtAl:2018:C18-1,
  author    = {Pérez-Rosas, Verónica  and  Kleinberg, Bennett  and  Lefevre, Alexandra  and  Mihalcea, Rada},
  title     = {Automatic Detection of Fake News},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3391--3401},
  abstract  = {The proliferation of misleading information in everyday access media outlets such as social media feeds, news blogs, and online newspapers have made it challenging to identify trustworthy news sources, thus increasing the need for computational tools able to provide insights into the reliability of online content. In this paper, we focus on the automatic identification of fake content in online news. Our contribution is twofold. First, we introduce two novel datasets for the task of fake news detection, covering seven different news domains. We describe the collection, annotation, and validation process in detail and present several exploratory analyses on the identification of linguistic differences in fake and legitimate news content. Second, we conduct a set of learning experiments to build accurate fake news detectors, and show that we can achieve accuracies of up to 76%. In addition, we provide comparative analyses of the automatic and manual identification of fake news.},
  url       = {http://www.aclweb.org/anthology/C18-1287}
}

@InProceedings{kochkina-liakata-zubiaga:2018:C18-1,
  author    = {Kochkina, Elena  and  Liakata, Maria  and  Zubiaga, Arkaitz},
  title     = {All-in-one: Multi-task Learning for Rumour Verification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3402--3413},
  abstract  = {Automatic resolution of rumours is a challenging task that can be broken down into smaller components that make up a pipeline, including rumour detection, rumour tracking and stance classification, leading to the final outcome of determining the veracity of a rumour. In previous work, these steps in the process of rumour verification have been developed as separate components where the output of one feeds into the next. We propose a multi-task learning approach that allows joint training of the main and auxiliary tasks, improving the performance of rumour verification. We examine the connection between the dataset properties and the outcomes of the multi-task learning models used.},
  url       = {http://www.aclweb.org/anthology/C18-1288}
}

@InProceedings{groth-EtAl:2018:C18-1,
  author    = {Groth, Paul  and  Lauruhn, Mike  and  Scerri, Antony  and  Daniel, Jr., Ron},
  title     = {Open Information Extraction on Scientific Text: An Evaluation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3414--3423},
  abstract  = {Open Information Extraction (OIE) is the task of the unsupervised creation of structured information from text. OIE is often used as a starting point for a number of downstream tasks including knowledge base construction, relation extraction, and question answering. While OIE methods are targeted at being domain independent, they have been evaluated primarily on newspaper, encyclopedic or general web text. In this article, we evaluate the performance of OIE on scientific texts originating from 10 different disciplines. To do so, we use two state-of-the-art OIE systems using a crowd-sourcing approach. We find that OIE systems perform significantly worse on scientific text than encyclopedic text. We also provide an error analysis and suggest areas of work to reduce errors. Our corpus of sentences and judgments are made available.},
  url       = {http://www.aclweb.org/anthology/C18-1289}
}

@InProceedings{ksarma-sethares:2018:C18-1,
  author    = {K Sarma, Prathusha  and  Sethares, William},
  title     = {Simple Algorithms For Sentiment Analysis On Sentiment Rich, Data Poor Domains.},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3424--3435},
  abstract  = {Standard word embedding algorithms learn vector representations from large corpora of text documents in an unsupervised fashion. However, the quality of word embeddings learned from these algorithms is affected by the size of training data sets. Thus, applications of these algorithms in domains with only moderate amounts of available data is limited. In this paper we introduce an algorithm that learns word embeddings jointly with a classifier. Our algorithm is called SWESA (Supervised Word Embeddings for Sentiment Analysis). SWESA leverages document label information to learn vector representations of words from a modest corpus of text documents by solving an optimization problem that minimizes a cost function with respect to both word embeddings and the weight vector used for classification. Experiments on several real world data sets show that SWESA has superior performance on domains with limited data, when compared to previously suggested approaches to word embeddings and sentiment analysis tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1290}
}

@InProceedings{leeuwenberg-moens:2018:C18-1,
  author    = {Leeuwenberg, Artuur  and  Moens, Marie-Francine},
  title     = {Word-Level Loss Extensions for Neural Temporal Relation Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3436--3447},
  abstract  = {Unsupervised pre-trained word embeddings are used effectively for many tasks in natural language processing to leverage unlabeled textual data. Often these embeddings are either used as initializations or as fixed word representations for task-specific classification models. In this work, we extend our classification model’s task loss with an unsupervised auxiliary loss on the word-embedding level of the model. This is to ensure that the learned word representations contain both task-specific features, learned from the supervised loss component, and more general features learned from the unsupervised loss component. We evaluate our approach on the task of temporal relation extraction, in particular, narrative containment relation extraction from clinical records, and show that continued training of the embeddings on the unsupervised objective together with the task objective gives better task-specific embeddings, and results in an improvement over the state of the art on the THYME dataset, using only a general-domain part-of-speech tagger as linguistic resource.},
  url       = {http://www.aclweb.org/anthology/C18-1291}
}

@InProceedings{yeung-lee:2018:C18-1,
  author    = {Yeung, Chak Yan  and  Lee, John},
  title     = {Personalized Text Retrieval for Learners of Chinese as a Foreign Language},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3448--3455},
  abstract  = {This paper describes a personalized text retrieval algorithm that helps language learners select the most suitable reading material in terms of vocabulary complexity. The user first rates their knowledge of a small set of words, chosen by a graph-based active learning model. The system trains a complex word identification model on this set, and then applies the model to find texts that contain the desired proportion of new, challenging, and familiar vocabulary. In an evaluation on learners of Chinese as a foreign language, we show that this algorithm is effective in identifying simpler texts for low-proficiency learners, and more challenging ones for high-proficiency learners.},
  url       = {http://www.aclweb.org/anthology/C18-1292}
}

@InProceedings{markov-nastase-strapparava:2018:C18-1,
  author    = {Markov, Ilia  and  Nastase, Vivi  and  Strapparava, Carlo},
  title     = {Punctuation as Native Language Interference},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3456--3466},
  abstract  = {In this paper, we describe experiments designed to explore and evaluate the impact of punctuation marks on the task of native language identification. Punctuation is specific to each language, and is part of the indicators that overtly represent the manner in which each language organizes and conveys information. Our experiments are organized in various set-ups: the usual multi-class classification for individual languages, also considering classification by language groups, across different proficiency levels, topics and even cross-corpus. The results support our hypothesis that punctuation marks are persistent and robust indicators of the native language of the author, which do not diminish in influence even when a high proficiency level in a non-native language is achieved.},
  url       = {http://www.aclweb.org/anthology/C18-1293}
}

@InProceedings{zilio-wilkens-fairon:2018:C18-1,
  author    = {Zilio, Leonardo  and  Wilkens, Rodrigo  and  Fairon, Cédrick},
  title     = {Investigating Productive and Receptive Knowledge: A Profile for Second Language Learning},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3467--3478},
  abstract  = {The literature frequently addresses the differences in receptive and productive vocabulary, but grammar is often left unacknowledged in second language acquisition studies. In this paper, we used two corpora to investigate the divergences in the behavior of pedagogically relevant grammatical structures in reception and production texts. We further improved the divergence scores observed in this investigation by setting a polarity to them that indicates whether there is overuse or underuse of a grammatical structure by language learners. This led to the compilation of a language profile that was later combined with vocabulary and readability features for classifying reception and production texts in three classes: beginner, intermediate, and advanced. The results of the automatic classification task in both production (0.872 of F-measure) and reception (0.942 of F-measure) were comparable to the current state of the art. We also attempted to automatically attribute a score to texts produced by learners, and the correlation results were encouraging, but there is still a good amount of room for improvement in this task. The developed language profile will serve as input for a system that helps language learners to activate more of their passive knowledge in writing texts.},
  url       = {http://www.aclweb.org/anthology/C18-1294}
}

@InProceedings{chu-otani-nakashima:2018:C18-1,
  author    = {Chu, Chenhui  and  Otani, Mayu  and  Nakashima, Yuta},
  title     = {iParaphrasing: Extracting Visually Grounded Paraphrases via an Image},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3479--3492},
  abstract  = {A paraphrase is a restatement of the meaning of a text in other words. Paraphrases have been},
  url       = {http://www.aclweb.org/anthology/C18-1295}
}

@InProceedings{jiang-EtAl:2018:C18-13,
  author    = {Jiang, Feng  and  Xu, Sheng  and  Chu, Xiaomin  and  Li, Peifeng  and  Zhu, Qiaoming  and  Zhou, Guodong},
  title     = {MCDTB: A Macro-level Chinese Discourse TreeBank},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3493--3504},
  abstract  = {In view of the differences between the annotations of micro and macro discourse rela-tionships, this paper describes the relevant experiments on the construction of the Macro Chinese Discourse Treebank (MCDTB), a higher-level Chinese discourse corpus. Fol-lowing RST (Rhetorical Structure Theory), we annotate the macro discourse information, including discourse structure, nuclearity and relationship, and the additional discourse information, including topic sentences, lead and abstract, to make the macro discourse annotation more objective and accurate. Finally, we annotated 720 articles with a Kappa value greater than 0.6. Preliminary experiments on this corpus verify the computability of MCDTB.},
  url       = {http://www.aclweb.org/anthology/C18-1296}
}

@InProceedings{srinivasan-EtAl:2018:C18-1,
  author    = {Srinivasan, Balaji Vasan  and  Maneriker, Pranav  and  Krishna, Kundan  and  Modani, Natwar},
  title     = {Corpus-based Content Construction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3505--3515},
  abstract  = {Enterprise content writers are engaged in writing textual content for various purposes. Often, the text being written may already be present in the enterprise corpus in the form of past articles and can be re-purposed for the current needs. In the absence of suitable tools, authors manually curate/create such content (sometimes from scratch) which reduces their productivity. To address this, we propose an automatic approach to generate an initial version of the author's intended text based on an input content snippet. Starting with a set of extracted textual fragments related to the snippet based on the query words in it, the proposed approach builds the desired text from these fragment by simultaneously optimizing the information coverage, relevance, diversity and coherence in the generated content. Evaluations on standard datasets shows improved performance against existing baselines on several metrics.},
  url       = {http://www.aclweb.org/anthology/C18-1297}
}

@InProceedings{roesiger-riester-kuhn:2018:C18-1,
  author    = {Roesiger, Ina  and  Riester, Arndt  and  Kuhn, Jonas},
  title     = {Bridging resolution: Task definition, corpus resources and rule-based experiments},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3516--3528},
  abstract  = {Recent work on bridging resolution has so far been based on the corpus ISNotes (Markert et al. 2012), as this was the only corpus available with unrestricted bridging annotation. Hou et al. 2014's rule-based system currently achieves state-of-the-art performance on this corpus, as learning-based approaches suffer from the lack of available training data. },
  url       = {http://www.aclweb.org/anthology/C18-1298}
}

@InProceedings{wang-EtAl:2018:C18-19,
  author    = {Wang, Feng  and  Chen, Wei  and  Yang, Zhen  and  Dong, Qianqian  and  Xu, Shuang  and  Xu, Bo},
  title     = {Semi-Supervised Disfluency Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3529--3538},
  abstract  = {While the disfluency detection has achieved notable success in the past years, it still severely suffers from the data scarcity. To tackle this problem, we propose a novel semi-supervised approach which can utilize large amounts of unlabelled data. In this work, a light-weight neural net is proposed to extract the hidden features based solely on self-attention without any Recurrent Neural Network (RNN) or Convolutional Neural Network (CNN). In addition, we use the unlabelled corpus to enhance the performance. Besides, the Generative Adversarial Network (GAN) training is applied to enforce the similar distribution between the labelled and unlabelled data. The experimental results show that our approach achieves significant improvements over strong baselines.},
  url       = {http://www.aclweb.org/anthology/C18-1299}
}

@InProceedings{mezza-EtAl:2018:C18-1,
  author    = {Mezza, Stefano  and  Cervone, Alessandra  and  Stepanov, Evgeny  and  Tortoreto, Giuliano  and  Riccardi, Giuseppe},
  title     = {ISO-Standard Domain-Independent Dialogue Act Tagging for Conversational Agents},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3539--3551},
  abstract  = {Dialogue Act (DA) tagging is crucial for spoken language understanding systems, as it provides a general representation of speakers' intents, not bound to a particular dialogue system. },
  url       = {http://www.aclweb.org/anthology/C18-1300}
}

@InProceedings{alikhani-stone:2018:C18-1,
  author    = {Alikhani, Malihe  and  Stone, Matthew},
  title     = {Arrows are the Verbs of Diagrams},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3552--3563},
  abstract  = {Arrows are a key ingredient of schematic pictorial communication. This paper investigates the interpretation of arrows through linguistic, crowdsourcing and machine-learning methodology. Our work establishes a novel analogy between arrows and verbs: we advocate representing arrows in terms of qualitatively different structural and semantic frames, and resolving frames to specific interpretations using shallow world knowledge.},
  url       = {http://www.aclweb.org/anthology/C18-1301}
}

@InProceedings{zamaraeva-howell-rhine:2018:C18-1,
  author    = {Zamaraeva, Olga  and  Howell, Kristen  and  Rhine, Adam},
  title     = {Improving Feature Extraction for Pathology Reports with Precise Negation Scope Detection},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3564--3575},
  abstract  = {We use a broad coverage, linguistically precise English Resource Grammar (ERG) to detect negation scope in sentences taken from pathology reports. We show that incorporating this information in feature extraction has a positive effect on classification of the reports with respect to cancer laterality compared with NegEx, a commonly used tool for negation detection. We analyze the differences between NegEx and ERG results on our dataset and how these differences indicate some directions for future work.},
  url       = {http://www.aclweb.org/anthology/C18-1302}
}

@InProceedings{wang-EtAl:2018:C18-110,
  author    = {Wang, Guolong  and  Qin, Zheng  and  Xu, Kaiping  and  Huang, Kai  and  Ye, Shuxiong},
  title     = {Bridge Video and Text with Cascade Syntactic Structure},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3576--3585},
  abstract  = {We present a video captioning approach that encodes features by progressively completing syntactic structure (LSTM-CSS). To construct basic syntactic structure (i.e., subject, predicate, and object), we use a Conditional Random Field to label semantic representations (i.e., motions, objects). We argue that in order to improve the comprehensiveness of the description, the local features within object regions can be used to generate complementary syntactic elements (e.g., attribute, adverbial). Inspired by redundancy of human receptors, we utilize a Region Proposal Network to focus on the object regions. To model the final temporal dynamics, Recurrent Neural Network with Path Embeddings is adopted. We demonstrate the effectiveness of LSTM-CSS on generating natural sentences: 42.3% and 28.5% in terms of BLEU$@$4 and METEOR. Superior performance when compared to state-of-the-art methods are reported on a large video description dataset (i.e., MSR-VTT-2016).},
  url       = {http://www.aclweb.org/anthology/C18-1303}
}

@InProceedings{masumura-EtAl:2018:C18-1,
  author    = {Masumura, Ryo  and  Tanaka, Tomohiro  and  Higashinaka, Ryuichiro  and  Masataki, Hirokazu  and  Aono, Yushi},
  title     = {Multi-task and Multi-lingual Joint Learning of Neural Lexical Utterance Classification based on Partially-shared Modeling},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3586--3596},
  abstract  = {This paper is an initial study on multi-task and multi-lingual joint learning for lexical utterance classification. A major problem in constructing lexical utterance classification modules for spoken dialogue systems is that individual data resources are often limited or unbalanced among tasks and/or languages. Various studies have examined joint learning using neural-network based shared modeling; however, previous joint learning studies focused on either cross-task or cross-lingual knowledge transfer. In order to simultaneously support both multi-task and multi-lingual joint learning, our idea is to explicitly divide state-of-the-art neural lexical utterance classification into language-specific components that can be shared between different tasks and task-specific components that can be shared between different languages. In addition, in order to effectively transfer knowledge between different task data sets and different language data sets, this paper proposes a partially-shared modeling method that possesses both shared components and components specific to individual data sets. We demonstrate the effectiveness of proposed method using Japanese and English data sets with three different lexical utterance classification tasks.},
  url       = {http://www.aclweb.org/anthology/C18-1304}
}

@InProceedings{bai-EtAl:2018:C18-1,
  author    = {Bai, He  and  Zhou, Yu  and  Zhang, Jiajun  and  Zhao, Liang  and  Hwang, Mei-Yuh  and  Zong, Chengqing},
  title     = {Source Critical Reinforcement Learning for Transferring Spoken Language Understanding to a New Language},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3597--3607},
  abstract  = {To deploy a spoken language understanding (SLU) model to a new language, language transferring is desired to avoid the trouble of acquiring and labeling a new big SLU corpus. An SLU corpus is a monolingual corpus with domain/intent/slot labels. Translating the original SLU corpus into the target language is an attractive strategy. However, SLU corpora consist of plenty of semantic labels (slots), which general-purpose translators cannot handle well, not to mention additional culture differences. This paper focuses on the language transferring task given a small in-domain parallel SLU corpus. The in-domain parallel corpus can be used as the first adaptation on the general translator. But more importantly, we show how to use reinforcement learning (RL) to further adapt the adapted translator, where translated sentences with more proper slot tags receive higher rewards. Our reward is derived from the source input sentence exclusively, unlike reward via actor-critical methods or computing reward with a ground truth target sentence. Hence we can adapt the translator the second time, using the big monolingual SLU corpus from the source language. We evaluate our approach on Chinese to English language transferring for SLU systems. The experimental results show that the generated English SLU corpus via adaptation and reinforcement learning gives us over 97% in the slot F1 score and over 84% accuracy in domain classification. It demonstrates the effectiveness of the proposed language transferring method. Compared with naive translation, our proposed method improves domain classification accuracy by relatively 22%, and the slot filling F1 score by relatively more than 71%.},
  url       = {http://www.aclweb.org/anthology/C18-1305}
}

@InProceedings{wang-EtAl:2018:C18-111,
  author    = {Wang, Zongsheng  and  Bai, Yunzhi  and  Wu, Bowen  and  Xu, Zhen  and  Wang, Zhuoran  and  Wang, Baoxun},
  title     = {A Prospective-Performance Network to Alleviate Myopia in Beam Search for Response Generation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3608--3618},
  abstract  = {Generative dialog models usually adopt beam search as the inference method to generate responses. However, small-width beam search only focuses on the limited current optima. This deficiency named as myopic bias ultimately suppresses the diversity and probability of generated responses. Although increasing the beam width mitigates the myopic bias, it also proportionally slows down the inference efficiency. To alleviate the myopic bias in small-width beam search, this paper proposes a Prospective-Performance Network (PPN) to predict the future reward of the given partially-generated response, and the future reward is defined by the expectation of the partial response appearing in the top-ranked responses given by a larger-width beam search. Enhanced by PPN, the decoder can promote the results with great potential during the beam search phase. The experimental results on both Chinese and English corpora show that our method is promising to increase the quality and diversity of generated responses, with inference efficiency well maintained.},
  url       = {http://www.aclweb.org/anthology/C18-1306}
}

@InProceedings{xing-zhu-zhang:2018:C18-1,
  author    = {Xing, Junjie  and  Zhu, Kenny  and  Zhang, Shaodian},
  title     = {Adaptive Multi-Task Transfer Learning for Chinese Word Segmentation in Medical Text},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3619--3630},
  abstract  = {Chinese word segmentation (CWS) trained from open source corpus faces dramatic performance drop when dealing with domain text, especially for a domain with lots of special terms and diverse writing styles, such as the biomedical domain. However, building domain-specific CWS requires extremely high annotation cost. In this paper, we propose an approach by exploiting domain-invariant knowledge from high resource to low resource domains. Extensive experiments show that our model achieves consistently higher accuracy than the single-task CWS and other transfer learning baselines, especially when there is a large disparity between source and target domains.},
  url       = {http://www.aclweb.org/anthology/C18-1307}
}

@InProceedings{sato-ouchi-tsuboi:2018:C18-1,
  author    = {Sato, Motoki  and  Ouchi, Hiroki  and  Tsuboi, Yuta},
  title     = {Addressee and Response Selection for Multilingual Conversation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3631--3644},
  abstract  = {Developing conversational systems that can converse in many languages is an interesting challenge for natural language processing.},
  url       = {http://www.aclweb.org/anthology/C18-1308}
}

@InProceedings{liu-mitamura-hovy:2018:C18-1,
  author    = {Liu, Zhengzhong  and  Mitamura, Teruko  and  Hovy, Eduard},
  title     = {Graph Based Decoding for Event Sequencing and Coreference Resolution},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3645--3657},
  abstract  = {Events in text documents are interrelated in complex ways. In this paper, we study two types of relation: Event Coreference and Event Sequencing. We show that the popular tree-like decoding structure for automated Event Coreference is not suitable for Event Sequencing. To this end, we propose a graph-based decoding algorithm that is applicable to both tasks. The new decoding algorithm supports flexible feature sets for both tasks. Empirically, our event coreference system has achieved state-of-the-art performance on the TAC-KBP 2015 event coreference task and our event sequencing system beats a strong temporal-based, oracle-informed baseline. We discuss the challenges of studying these event relations.},
  url       = {http://www.aclweb.org/anthology/C18-1309}
}

@InProceedings{vanmiltenburg-EtAl:2018:C18-1,
  author    = {van Miltenburg, Emiel  and  Kádár, Ákos  and  Koolen, Ruud  and  Krahmer, Emiel},
  title     = {DIDEC: The Dutch Image Description and Eye-tracking Corpus},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3658--3669},
  abstract  = {We present a corpus of spoken Dutch image descriptions, paired with two sets of eye-tracking data: Free viewing, where participants look at images without any particular purpose, and Description viewing, where we track eye movements while participants produce spoken descriptions of the images they are viewing. This paper describes the data collection procedure and the corpus itself, and provides an initial analysis of self-corrections in image descriptions. We also present two studies showing the potential of this data. Though these studies mainly serve as an example, we do find two interesting results: (1) the eye-tracking data for the description viewing task is more coherent than for the free-viewing task; (2) variation in image descriptions (also called 'image specificity'; Jas and Parikh, 2015) is only moderately correlated across different languages. Our corpus can be used to gain a deeper understanding of the image description task, particularly how visual attention is correlated with the image description process.},
  url       = {http://www.aclweb.org/anthology/C18-1310}
}

@InProceedings{simonson-davis:2018:C18-1,
  author    = {Simonson, Dan  and  Davis, Anthony},
  title     = {Narrative Schema Stability in News Text},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3670--3680},
  abstract  = {We investigate the stability of narrative schemas (Chambers \& Jurafsky, 2009) automatically induced from a news corpus, representing recurring narratives in a corpus. If such techniques produce meaningful results, we should expect that small changes to the corpus will result in only small changes to the induced schemas. We describe experiments involving successive ablation of a corpus and cross-validation at each stage of ablation, on schemas generated by three different techniques over a general news corpus and topically-specific subcorpora. We also develop a method for evaluating the similarity between sets of narrative schemas, and thus the stability of the schema induction algorithms. This stability analysis affirms the heterogeneous/homogeneous document category hypothesis first presented in Simonson \& Davis (2016), whose technique is problematically limited. Additionally, increased ablation leads to increasing stability, so the smaller the remaining corpus, the more stable schema generation appears to be. We surmise that as a corpus grows larger, novel and more varied narratives continue to appear and stability declines, though at some point this decline levels off as new additions to the corpus consist essentially of “more of the same.”},
  url       = {http://www.aclweb.org/anthology/C18-1311}
}

@InProceedings{yusupov-kuratov:2018:C18-1,
  author    = {Yusupov, Idris  and  Kuratov, Yurii},
  title     = {NIPS Conversational Intelligence Challenge 2017 Winner System: Skill-based Conversational Agent with Supervised Dialog Manager},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3681--3692},
  abstract  = {We present bot\#1337: a dialog system developed for the 1st NIPS Conversational Intelligence Challenge 2017 (ConvAI). The aim of the competition was to implement a bot capable of conversing with humans based on a given passage of text. To enable conversation, we implemented a set of skills for our bot, including chit-chat, topic detection, text summarization, question answering and question generation. The system has been trained in a supervised setting using a dialogue manager to select an appropriate skill for generating a response. The latter allows a developer to focus on the skill implementation rather than the finite state machine based dialog manager. The proposed system bot\#1337 won the competition with an average dialogue quality score of 2.78 out of 5 given by human evaluators. Source code and trained models for the bot\#1337 are available on GitHub.},
  url       = {http://www.aclweb.org/anthology/C18-1312}
}

@InProceedings{ogorman-EtAl:2018:C18-1,
  author    = {O'Gorman, Tim  and  Regan, Michael  and  Griffitt, Kira  and  Hermjakob, Ulf  and  Knight, Kevin  and  Palmer, Martha},
  title     = {AMR Beyond the Sentence: the Multi-sentence AMR corpus},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3693--3702},
  abstract  = {There are few corpora that endeavor to represent the semantic content of entire documents. We present a corpus that accomplishes one way of capturing document level semantics, by annotating coreference and similar phenomena (bridging and implicit roles) on top of gold Abstract Meaning Representations of sentence-level semantics. We present a new corpus of this annotation, with analysis of its quality, alongside a plausible baseline for comparison. It is hoped that this Multi-Sentence AMR corpus (MS-AMR) may become a feasible method for developing rich representations of document meaning, useful for tasks such as information extraction and question answering.},
  url       = {http://www.aclweb.org/anthology/C18-1313}
}

@InProceedings{ji-EtAl:2018:C18-1,
  author    = {Ji, Lu  and  Wei, Zhongyu  and  Hu, Xiangkun  and  Liu, Yang  and  Zhang, Qi  and  Huang, Xuanjing},
  title     = {Incorporating Argument-Level Interactions for Persuasion Comments Evaluation using Co-attention Model},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3703--3714},
  abstract  = {In this paper, we investigate the issue of persuasiveness evaluation for argumentative comments. Most of the existing research explores different text features of reply comments on word level and ignores interactions between participants. In general, viewpoints are usually expressed by multiple arguments and exchanged on argument level. To better model the process of dialogical argumentation, we propose a novel co-attention mechanism based neural network to capture the interactions between participants on argument level. Experimental results on a publicly available dataset show that the proposed model significantly outperforms some state-of-the-art methods for persuasiveness evaluation. Further analysis reveals that attention weights computed in our model are able to extract interactive argument pairs from the original post and the reply.},
  url       = {http://www.aclweb.org/anthology/C18-1314}
}

@InProceedings{shi-EtAl:2018:C18-1,
  author    = {Shi, Haoyue  and  Mao, Jiayuan  and  Xiao, Tete  and  Jiang, Yuning  and  Sun, Jian},
  title     = {Learning Visually-Grounded Semantics from Contrastive Adversarial Samples},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3715--3727},
  abstract  = {We study the problem of grounding distributional representations of texts on the visual domain, namely visual-semantic embeddings (VSE for short). Begin with an insightful adversarial attack on VSE embeddings, we show the limitation of current frameworks and image-text datasets (e.g., MS-COCO) both quantitatively and qualitatively. The large gap between the number of possible constitutions of real-world semantics and the size of parallel data, to a large extent, restricts the model to establish a strong link between textual semantics and visual concepts. We alleviate this problem by augmenting the MS-COCO image captioning datasets with textual contrastive adversarial samples. These samples are synthesized using language priors of human and the WordNet knowledge base, and enforce the model to ground learned embeddings to concrete concepts within the image. This simple but powerful technique brings a noticeable improvement over the baselines on a diverse set of downstream tasks, in addition to defending known-type adversarial attacks. Codes are available at https://github.com/ExplorerFreda/VSE-C.},
  url       = {http://www.aclweb.org/anthology/C18-1315}
}

@InProceedings{li-porco-goldwasser:2018:C18-1,
  author    = {Li, Chang  and  Porco, Aldo  and  Goldwasser, Dan},
  title     = {Structured Representation Learning for Online Debate Stance Prediction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3728--3739},
  abstract  = {Online debates can help provide valuable information about various perspectives on a wide range of issues. However, understanding the stances expressed in these debates is a highly challenging task, which requires modeling both textual content and users' conversational interactions. Current approaches take a collective classification approach, which ignores the relationships between different debate topics.},
  url       = {http://www.aclweb.org/anthology/C18-1316}
}

@InProceedings{zhang-EtAl:2018:C18-14,
  author    = {Zhang, Zhuosheng  and  Li, Jiangtong  and  Zhu, Pengfei  and  Zhao, Hai  and  Liu, Gongshen},
  title     = {Modeling Multi-turn Conversation with Deep Utterance Aggregation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3740--3752},
  abstract  = {Multi-turn conversation understanding is a major challenge for building intelligent dialogue systems. This work focuses on retrieval-based response matching for multi-turn conversation whose related work simply concatenates the conversation utterances, ignoring the interactions among previous utterances for context modeling. In this paper, we formulate previous utterances into context using a proposed deep utterance aggregation model to form a fine-grained context representation. In detail, a self-matching attention is first introduced to route the vital information in each utterance. Then the model matches a response with each refined utterance and the final matching score is obtained after attentive turns aggregation. Experimental results show our model outperforms the state-of-the-art methods on three multi-turn conversation benchmarks, including a newly introduced e-commerce dialogue corpus.},
  url       = {http://www.aclweb.org/anthology/C18-1317}
}

@InProceedings{wachsmuth-EtAl:2018:C18-1,
  author    = {Wachsmuth, Henning  and  Stede, Manfred  and  El Baff, Roxanne  and  Al Khatib, Khalid  and  Skeppstedt, Maria  and  Stein, Benno},
  title     = {Argumentation Synthesis following Rhetorical Strategies},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3753--3765},
  abstract  = {Persuasion is rarely achieved through a loose set of arguments alone. Rather, an effective delivery of arguments follows a rhetorical strategy, combining logical reasoning with appeals to ethics and emotion. We argue that such a strategy means to select, arrange, and phrase a set of argumentative discourse units. In this paper, we model rhetorical strategies for the computational synthesis of effective argumentation. In a study, we let 26 experts synthesize argumentative texts with different strategies for 10 topics. We find that the experts agree in the selection significantly more when following the same strategy. While the texts notably vary for different strategies, especially their arrangement remains stable. The results suggest that our model enables a strategical synthesis.},
  url       = {http://www.aclweb.org/anthology/C18-1318}
}

@InProceedings{banerjee-EtAl:2018:C18-1,
  author    = {Banerjee, Suman  and  Moghe, Nikita  and  Arora, Siddhartha  and  Khapra, Mitesh M.},
  title     = {A Dataset for Building Code-Mixed Goal Oriented Conversation Systems},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3766--3780},
  abstract  = {There is an increasing demand for goal-oriented conversation systems which can assist users in various day-to-day activities such as booking tickets, restaurant reservations, shopping, etc. Most of the existing datasets for building such conversation systems focus on monolingual conversations and there is hardly any work on multilingual and/or code-mixed conversations. Such datasets and systems thus do not cater to the multilingual regions of the world, such as India, where it is very common for people to speak more than one language and seamlessly switch between them resulting in code-mixed conversations. For example, a Hindi speaking user looking to book a restaurant would typically ask, ``Kya tum is restaurant mein ek table book karne mein meri help karoge?" (``Can you help me in booking a table at this restaurant?"). To facilitate the development of such code-mixed conversation models, we build a goal-oriented dialog dataset containing code-mixed conversations. Specifically, we take the text from the DSTC2 restaurant reservation dataset and create code-mixed versions of it in Hindi-English, Bengali-English, Gujarati-English and Tamil-English. We also establish initial baselines on this dataset using existing state of the art models. This dataset along with our baseline implementations will be made publicly available for research purposes.},
  url       = {http://www.aclweb.org/anthology/C18-1319}
}

@InProceedings{wen-EtAl:2018:C18-1,
  author    = {Wen, Haoyang  and  Liu, Yijia  and  Che, Wanxiang  and  Qin, Libo  and  Liu, Ting},
  title     = {Sequence-to-Sequence Learning for Task-oriented Dialogue with Dialogue State Representation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3781--3792},
  abstract  = {Classic pipeline models for task-oriented dialogue system require explicit modeling the dialogue states and},
  url       = {http://www.aclweb.org/anthology/C18-1320}
}

@InProceedings{mitra-EtAl:2018:C18-1,
  author    = {Mitra, Sayantan  and  Hasanuzzaman, Mohammed  and  Saha, Sriparna  and  Way, Andy},
  title     = {Incorporating Deep Visual Features into Multiobjective based Multi-view Search Results Clustering},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3793--3805},
  abstract  = {Current paper explores the use of multi-view learning for search result clustering. A web-snippet can be represented using multiple views. Apart from textual view cued by both the semantic and syntactic information, a complimentary view extracted from images contained in the web-snippets is also utilized in the current framework. A single consensus partitioning is finally obtained after consulting these two individual views by the deployment of a multiobjective based clustering technique. Several objective functions including the values of a cluster quality measure measuring the goodness of partitionings obtained using different views and an agreement-disagreement index, quantifying the amount of oneness among multiple views in generating partitionings are optimized simultaneously using AMOSA. In order to detect the number of clusters automatically, concepts of variable length solutions and a vast range of permutation operators are introduced in the clustering process. Finally, a set of alternative partitioning are obtained on the final Pareto front by the proposed multi-view based multiobjective technique. Experimental results by the proposed approach on several benchmark test datasets of SRC with respect to different performance metrics evidently establish the power of visual and text-based views in achieving better search result clustering.},
  url       = {http://www.aclweb.org/anthology/C18-1321}
}

@InProceedings{miura-EtAl:2018:C18-1,
  author    = {Miura, Yasuhide  and  Kano, Ryuji  and  Taniguchi, Motoki  and  Taniguchi, Tomoki  and  Misawa, Shotaro  and  Ohkuma, Tomoko},
  title     = {Integrating Tree Structures and Graph Structures with Neural Networks to Classify Discussion Discourse Acts},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3806--3818},
  abstract  = {We proposed a model that integrates discussion structures with neural networks to classify discourse acts. Several attempts have been made in earlier works to analyze texts that are used in various discussions. The importance of discussion structures has been explored in those works but their methods required a sophisticated design to combine structural features with a classifier. Our model introduces tree learning approaches and a graph learning approach to directly capture discussion structures without structural features. In an evaluation to classify discussion discourse acts in Reddit, the model achieved improvements of 1.5% in accuracy and 2.2 in FB1 score compared to the previous best model. We further analyzed the model using an attention mechanism to inspect interactions among different learning approaches.},
  url       = {http://www.aclweb.org/anthology/C18-1322}
}

@InProceedings{ercan-yldz:2018:C18-1,
  author    = {Ercan, Gökhan  and  Yıldız, Olcay Taner},
  title     = {AnlamVer: Semantic Model Evaluation Dataset for Turkish - Word Similarity and Relatedness},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3819--3836},
  abstract  = {In this paper, we present AnlamVer, which is a semantic model evaluation dataset for Turkish designed to evaluate word similarity and word relatedness tasks while discriminating those two relations from each other. Our dataset consists of 500 word-pairs annotated by 12 human subjects, and each pair has two distinct scores for similarity and relatedness. Word-pairs are selected to enable the evaluation of distributional semantic models by multiple attributes of words and word-pair relations such as frequency, morphology, concreteness and relation types (e.g., synonymy, antonymy). Our aim is to provide insights to semantic model researchers by evaluating models in multiple attributes. We balance dataset word-pairs by their frequencies to evaluate the robustness of semantic models concerning out-of-vocabulary and rare words problems, which are caused by the rich derivational and inflectional morphology of the Turkish language.},
  url       = {http://www.aclweb.org/anthology/C18-1323}
}

@InProceedings{przepirkowski-patejuk:2018:C18-1,
  author    = {Przepiórkowski, Adam  and  Patejuk, Agnieszka},
  title     = {Arguments and Adjuncts in Universal Dependencies},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3837--3852},
  abstract  = {The aim of this paper is to argue for a coherent Universal Dependencies approach to the core vs. non-core distinction. We demonstrate inconsistencies in the current version 2 of UD in this respect -- mostly resulting from the preservation of the argument--adjunct dichotomy despite the declared avoidance of this distinction -- and propose a relatively conservative modification of UD that is free from these problems.},
  url       = {http://www.aclweb.org/anthology/C18-1324}
}

@InProceedings{ruppenhofer-EtAl:2018:C18-1,
  author    = {Ruppenhofer, Josef  and  Wiegand, Michael  and  Wilm, Rebecca  and  Markert, Katja},
  title     = {Distinguishing affixoid formations from compounds},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3853--3865},
  abstract  = {We study German affixoids, a type of morpheme in between affixes and free stems. Several properties have been associated with them -- increased productivity; a bleached semantics, which is often evaluative and/or intensifying and thus of relevance to sentiment analysis; and the existence of a free morpheme counterpart -- but not been validated empirically. In experiments on a new data set that we make available, we put these key assumptions from the morphological literature to the test and show that despite the fact that affixoids generate many low-frequency formations, we can classify these as affixoid or non-affixoid instances with a best F1-score of 74%.},
  url       = {http://www.aclweb.org/anthology/C18-1325}
}

@InProceedings{niklaus-EtAl:2018:C18-1,
  author    = {Niklaus, Christina  and  Cetto, Matthias  and  Freitas, André  and  Handschuh, Siegfried},
  title     = {A Survey on Open Information Extraction},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3866--3878},
  abstract  = {We provide a detailed overview of the various approaches that were proposed to date to solve the task of Open Information Extraction. We present the major challenges that such systems face, show the evolution of the suggested approaches over time and depict the specific issues they address. In addition, we provide a critique of the commonly applied evaluation procedures for assessing the performance of Open IE systems and highlight some directions for future work.},
  url       = {http://www.aclweb.org/anthology/C18-1326}
}

@InProceedings{yang-liang-zhang:2018:C18-1,
  author    = {Yang, Jie  and  Liang, Shuailong  and  Zhang, Yue},
  title     = {Design Challenges and Misconceptions in Neural Sequence Labeling},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3879--3889},
  abstract  = {We investigate the design challenges of constructing effective and efficient neural sequence labeling systems, by reproducing twelve neural sequence labeling models, which include most of the state-of-the-art structures, and conduct a systematic model comparison on three benchmarks (i.e. NER, Chunking, and POS tagging). Misconceptions and inconsistent conclusions in existing literature are examined and clarified under statistical experiments. In the comparison and analysis process, we reach several practical conclusions which can be useful to practitioners.},
  url       = {http://www.aclweb.org/anthology/C18-1327}
}

@InProceedings{lan-xu:2018:C18-1,
  author    = {Lan, Wuwei  and  Xu, Wei},
  title     = {Neural Network Models for Paraphrase Identification, Semantic Textual Similarity, Natural Language Inference, and Question Answering},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3890--3902},
  abstract  = {In this paper, we analyze several neural network designs (and their variations) for sentence pair modeling and compare their performance extensively across eight datasets, including paraphrase identification, semantic textual similarity, natural language inference, and question answering tasks. Although most of these models have claimed state-of-the-art performance, the original papers often reported on only one or two selected datasets. We provide a systematic study and show that (i) encoding contextual information by LSTM and inter-sentence interactions are critical, (ii) Tree-LSTM does not help as much as previously claimed but surprisingly improves performance on Twitter datasets, (iii) the Enhanced Sequential Inference Model is the best so far for larger datasets, while the Pairwise Word Interaction Model achieves the best performance when less data is available. We release our implementations as an open-source toolkit.},
  url       = {http://www.aclweb.org/anthology/C18-1328}
}

@InProceedings{thompson-mimno:2018:C18-1,
  author    = {Thompson, Laure  and  Mimno, David},
  title     = {Authorless Topic Models: Biasing Models Away from Known Structure},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3903--3914},
  abstract  = {Most previous work in unsupervised semantic modeling in the presence of metadata has assumed that our goal is to make latent dimensions more correlated with metadata, but in practice the exact opposite is often true. Some users want topic models that highlight differences between, for example, authors, but others seek more subtle connections across authors. We introduce three metrics for identifying topics that are highly correlated with metadata, and demonstrate that this problem affects between 30 and 50% of the topics in models trained on two real-world collections, regardless of the size of the model. We find that we can predict which words cause this phenomenon and that by selectively subsampling these words we dramatically reduce topic-metadata correlation, improve topic stability, and maintain or even improve model quality.},
  url       = {http://www.aclweb.org/anthology/C18-1329}
}

@InProceedings{yang-EtAl:2018:C18-13,
  author    = {Yang, Pengcheng  and  Sun, Xu  and  Li, Wei  and  Ma, Shuming  and  Wu, Wei  and  Wang, Houfeng},
  title     = {SGM: Sequence Generation Model for Multi-label Classification},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3915--3926},
  abstract  = {Multi-label classification is an important yet challenging task in natural language processing. It is more complex than single-label classification in that the labels tend to be correlated. Existing methods tend to ignore the correlations between labels. Besides, different parts of the text can contribute differently for predicting different labels, which is not considered by existing models. In this paper, we propose to view the multi-label classification task as a sequence generation problem, and apply a sequence generation model with a novel decoder structure to solve it. Extensive experimental results show that our proposed methods outperform previous work by a substantial margin. Further analysis of experimental results demonstrates that the proposed methods not only capture the correlations between labels, but also select the most informative words automatically when predicting different labels.},
  url       = {http://www.aclweb.org/anthology/C18-1330}
}

