<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W17">
  <paper id="5900">
    <title>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</title>
    <editor>Yuen-Hsien Tseng</editor>
    <editor>Hsin-Hsi Chen</editor>
    <editor>Lung-Hao Lee</editor>
    <editor>Liang-Chih Yu</editor>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <url>http://www.aclweb.org/anthology/W17-59</url>
    <bibtype>book</bibtype>
    <bibkey>NLPTEA:2017</bibkey>
  </paper>

  <paper id="5901">
    <title>NTUCLE: Developing a Corpus of Learner English to Provide Writing Support for Engineering Students</title>
    <author><first>Roger Vivek Placidus</first><last>Winder</last></author>
    <author><first>Joseph</first><last>MacKinnon</last></author>
    <author><first>Shu Yun</first><last>Li</last></author>
    <author><first>Benedict Christopher Tzer Liang</first><last>Lin</last></author>
    <author><first>Carmel Lee Hah</first><last>Heah</last></author>
    <author><first>Lu&#237;s</first><last>Morgado da Costa</last></author>
    <author><first>Takayuki</first><last>Kuribayashi</last></author>
    <author><first>Francis</first><last>Bond</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>1&#8211;11</pages>
    <url>http://www.aclweb.org/anthology/W17-5901</url>
    <abstract>This paper describes the creation of a new annotated learner corpus. The aim is
	to use this corpus to develop an automated system for corrective feedback on
	students’ writing. With this system, students will be able to receive timely
	feedback on language errors before they submit their assignments for grading. A
	corpus of assignments submitted by first year engineering students was
	compiled, and a new error tag set for the NTU Corpus of Learner English
	(NTUCLE) was developed based on that of the NUS Corpus of Learner English
	(NUCLE), as well as marking rubrics used at NTU. After a description of the
	corpus, error tag set and annotation process, the paper presents the results of
	the annotation exercise as well as follow up actions. The final error tag set,
	which is significantly larger than that for the NUCLE error categories, is then
	presented before a brief conclusion summarising our experience and future
	plans.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>winder-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5902">
    <title>Understanding Non-Native Writings: Can a Parser Help?</title>
    <author><first>Jirka</first><last>Hana</last></author>
    <author><first>Barbora</first><last>Hladka</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>12&#8211;16</pages>
    <url>http://www.aclweb.org/anthology/W17-5902</url>
    <abstract>We present a pilot study on parsing non-native texts written by learners of
	Czech. We performed experiments that have shown that at least  high-level
	syntactic functions, like subject, predicate, and object, can be assigned based
	on a parser trained on standard native language.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>hana-hladka:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5903">
    <title>Carrier Sentence Selection for Fill-in-the-blank Items</title>
    <author><first>Shu</first><last>Jiang</last></author>
    <author><first>John</first><last>Lee</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>17&#8211;22</pages>
    <url>http://www.aclweb.org/anthology/W17-5903</url>
    <abstract>Fill-in-the-blank items are a common form of exercise in computer-assisted
	language learning systems.  To automatically generate an effective item, the
	system must be able to select a high-quality carrier sentence that illustrates
	the usage of the target word.  Previous approaches for carrier sentence
	selection have considered sentence length, vocabulary difficulty, the position
	of the target word and the presence of finite verbs.  This paper investigates
	the utility of word co-occurrence statistics and lexical similarity as
	selection criteria.  In an evaluation on generating fill-in-the-blank items for
	learning Chinese as a foreign language, we show that these two criteria can
	improve carrier sentence quality.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>jiang-lee:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5904">
    <title>Hindi Shabdamitra: A Wordnet based E-Learning Tool for Language Learning and Teaching</title>
    <author><first>Hanumant</first><last>Redkar</last></author>
    <author><first>Sandhya</first><last>Singh</last></author>
    <author><first>Meenakshi</first><last>Somasundaram</last></author>
    <author><first>Dhara</first><last>Gorasia</last></author>
    <author><first>Malhar</first><last>Kulkarni</last></author>
    <author><first>Pushpak</first><last>Bhattacharyya</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>23&#8211;28</pages>
    <url>http://www.aclweb.org/anthology/W17-5904</url>
    <abstract>In today's technology driven digital era, education domain is undergoing a
	transformation from traditional approaches to more learner controlled and
	flexible methods of learning. This transformation has opened the new avenues
	for interdisciplinary research in the field of educational technology and
	natural language processing in developing quality digital aids for learning and
	teaching. The tool presented here - Hindi Shabhadamitra, developed using Hindi
	Wordnet for Hindi language learning, is one such e-learning tool. It has been
	developed as a teaching and learning aid suitable for formal school based
	curriculum and informal setup for self learning users. Besides vocabulary, it
	also provides word based grammar along with images and pronunciation for better
	learning and retention. This aid demonstrates that how a rich lexical resource
	like wordnet can be systematically remodeled for practical usage in the
	educational domain.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>redkar-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5905">
    <title>NLPTEA 2017 Shared Task &#8211; Chinese Spelling Check</title>
    <author><first>Gabriel</first><last>Fung</last></author>
    <author><first>Maxime</first><last>Debosschere</last></author>
    <author><first>Dingmin</first><last>Wang</last></author>
    <author><first>Bo</first><last>Li</last></author>
    <author><first>Jia</first><last>Zhu</last></author>
    <author><first>Kam-Fai</first><last>Wong</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>29&#8211;34</pages>
    <url>http://www.aclweb.org/anthology/W17-5905</url>
    <abstract>This paper provides an overview along with our findings of the Chinese Spelling
	Check shared task at NLPTEA 2017. The goal of this task is to develop a
	computer-assisted system to automatically diagnose typing errors in traditional
	Chinese sentences written by students. We defined six types of errors which
	belong to two categories. Given a sentence, the system should detect where the
	errors are, and for each detected error determine its type and provide
	correction suggestions. We designed, constructed, and released a benchmark
	dataset for this task.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>fung-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5906">
    <title>Chinese Spelling Check based on N-gram and String Matching Algorithm</title>
    <author><first>Jui-Feng</first><last>Yeh</last></author>
    <author><first>Li-Ting</first><last>Chang</last></author>
    <author><first>Chan-Yi</first><last>Liu</last></author>
    <author><first>Tsung-Wei</first><last>Hsu</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>35&#8211;38</pages>
    <url>http://www.aclweb.org/anthology/W17-5906</url>
    <abstract>This paper presents a Chinese spelling check approach based on language models
	combined with string match algorithm to treat the problems resulted from the
	influence caused by Cantonese mother tone. N-grams first used to detecting the
	probability of sentence constructed by the writers, a string matching algorithm
	 called      Knuth-Morris-Pratt (KMP) Algorithm  is used to detect and correct
	the error. According to the experimental results, the proposed approach can
	detect the error and provide the corresponding correction.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>yeh-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5907">
    <title>N-gram Model for Chinese Grammatical Error Diagnosis</title>
    <author><first>Jianbo</first><last>Zhao</last></author>
    <author><first>Hao</first><last>Liu</last></author>
    <author><first>Zuyi</first><last>Bao</last></author>
    <author><first>Xiaopeng</first><last>Bai</last></author>
    <author><first>Si</first><last>Li</last></author>
    <author><first>Zhiqing</first><last>Lin</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>39&#8211;44</pages>
    <url>http://www.aclweb.org/anthology/W17-5907</url>
    <abstract>Detection and correction of Chinese grammatical errors have been two of major
	challenges for Chinese automatic grammatical error diagnosis.This paper
	presents an N-gram model for automatic detection and correction of Chinese
	grammatical errors in NLPTEA 2017 task. The experiment results show that the
	proposed method is good at correction of Chinese grammatical errors.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>zhao-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5908">
    <title>The Influence of Spelling Errors on Content Scoring Performance</title>
    <author><first>Andrea</first><last>Horbach</last></author>
    <author><first>Yuning</first><last>Ding</last></author>
    <author><first>Torsten</first><last>Zesch</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>45&#8211;53</pages>
    <url>http://www.aclweb.org/anthology/W17-5908</url>
    <abstract>Spelling errors occur frequently in educational settings, but their influence
	on automatic scoring is largely unknown.
	We therefore investigate the influence of spelling errors on content scoring
	performance using the example of the ASAP corpus.
	We conduct an annotation study on the nature of spelling errors in the ASAP
	dataset and utilize these finding in machine learning experiments that measure
	the influence of spelling errors on automatic content scoring. Our main finding
	is that scoring methods using both token and character n-gram features are
	robust against spelling errors up to the error frequency in ASAP.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>horbach-ding-zesch:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5909">
    <title>Analyzing the Impact of Spelling Errors on POS-Tagging and Chunking in Learner English</title>
    <author><first>Tomoya</first><last>Mizumoto</last></author>
    <author><first>Ryo</first><last>Nagata</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>54&#8211;58</pages>
    <url>http://www.aclweb.org/anthology/W17-5909</url>
    <abstract>Part-of-speech (POS) tagging and chunking have been used in tasks targeting
	learner English;
	however, to the best our knowledge, few studies have evaluated their
	performance and no studies have revealed the causes of POS-tagging/chunking
	errors in detail.
	Therefore, we investigate performance and analyze the causes of failure. We
	focus on spelling errors that occur frequently in learner English.
	We demonstrate that spelling errors reduced POS-tagging performance by 0.23%
	owing to spelling errors, and that a spell checker is not necessary for
	POS-tagging/chunking of learner English.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>mizumoto-nagata:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5910">
    <title>Complex Word Identification: Challenges in Data Annotation and System Performance</title>
    <author><first>Marcos</first><last>Zampieri</last></author>
    <author><first>Shervin</first><last>Malmasi</last></author>
    <author><first>Gustavo</first><last>Paetzold</last></author>
    <author><first>Lucia</first><last>Specia</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>59&#8211;63</pages>
    <url>http://www.aclweb.org/anthology/W17-5910</url>
    <abstract>This paper revisits the problem of complex word identification (CWI) following
	up the SemEval CWI shared task. We use ensemble classifiers to investigate how
	well computational methods can discriminate between complex and non-complex
	words. Furthermore, we analyze the classification performance to understand
	what makes lexical complexity challenging. Our findings show that most systems
	performed poorly on the SemEval CWI dataset, and one of the reasons for that is
	the way in which human annotation was performed.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>zampieri-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5911">
    <title>Suggesting Sentences for ESL using Kernel Embeddings</title>
    <author><first>Kent</first><last>Shioda</last></author>
    <author><first>Mamoru</first><last>Komachi</last></author>
    <author><first>Rue</first><last>Ikeya</last></author>
    <author><first>Daichi</first><last>Mochihashi</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>64&#8211;68</pages>
    <url>http://www.aclweb.org/anthology/W17-5911</url>
    <abstract>Sentence retrieval is an important NLP application for English as a Second
	Language (ESL) learners.
	ESL learners are familiar with web search engines, but generic web search
	results may not be adequate for composing documents in a specific domain.
	However, if we build our own search system specialized to a domain, it may be
	subject to the data sparseness problem.
	Recently proposed word2vec partially addresses the data sparseness problem, but
	fails to extract sentences relevant to queries owing to the modeling of the
	latent intent of the query.
	Thus, we propose a method of retrieving example sentences using kernel
	embeddings and N-gram windows.
	This method implicitly models latent intent of query and sentences, and
	alleviates the problem of noisy alignment.
	Our results show that our method achieved higher precision in sentence
	retrieval for ESL in the domain of a university press release corpus, as
	compared to a previous unsupervised method used for a semantic textual
	similarity task.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>shioda-EtAl:2017:NLPTEA</bibkey>
  </paper>

  <paper id="5912">
    <title>Event Timeline Generation from History Textbooks</title>
    <author><first>Harsimran</first><last>Bedi</last></author>
    <author><first>Sangameshwar</first><last>Patil</last></author>
    <author><first>Swapnil</first><last>Hingmire</last></author>
    <author><first>Girish</first><last>Palshikar</last></author>
    <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017)</booktitle>
    <month>December</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>69&#8211;77</pages>
    <url>http://www.aclweb.org/anthology/W17-5912</url>
    <abstract>Event timeline serves as the basic structure of history, and it is used as a
	disposition of key phenomena in studying history as a subject in secondary
	school. In order to enable a student to understand a historical phenomenon as a
	series of connected events, we present a system for automatic event timeline
	generation from history textbooks. Additionally, we propose Message Sequence
	Chart (MSC) and time-map based visualization techniques to visualize an event
	timeline. We also identify key computational challenges in developing natural
	language processing based applications for history textbooks.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>bedi-EtAl:2017:NLPTEA</bibkey>
  </paper>

</volume>

