<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W16">
  <paper id="2600">
    <title>Proceedings of the 10th Web as Corpus Workshop</title>
    <editor>Paul Cook</editor>
    <editor>​Stefan Evert</editor>
    <editor>​Roland Schäfer</editor>
    <editor>Egon Stemle</editor>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <url>http://anthology.aclweb.org/W16-26</url>
    <bibtype>book</bibtype>
    <bibkey>WAC-X:2016</bibkey>
  </paper>

  <paper id="2601">
    <title>Automatic Classification by Topic Domain for Meta Data Generation, Web Corpus Evaluation, and Corpus Comparison</title>
    <author><first>Roland</first><last>Sch&#228;fer</last></author>
    <author><first>Felix</first><last>Bildhauer</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>1&#8211;6</pages>
    <url>http://anthology.aclweb.org/W16-2601</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>schafer-bildhauer:2016:WAC-X</bibkey>
  </paper>

  <paper id="2602">
    <title>Efficient construction of metadata-enhanced web corpora</title>
    <author><first>Adrien</first><last>Barbaresi</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>7&#8211;16</pages>
    <url>http://anthology.aclweb.org/W16-2602</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>barbaresi:2016:WAC-X</bibkey>
  </paper>

  <paper id="2603">
    <title>Topically-focused Blog Corpora for Multiple Languages</title>
    <author><first>Andrew</first><last>Salway</last></author>
    <author><first>Dag</first><last>Elgesem</last></author>
    <author><first>Knut</first><last>Hofland</last></author>
    <author><first>&#216;ystein</first><last>Reigem</last></author>
    <author><first>Lubos</first><last>Steskal</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>17&#8211;26</pages>
    <url>http://anthology.aclweb.org/W16-2603</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>salway-EtAl:2016:WAC-X</bibkey>
  </paper>

  <paper id="2604">
    <title>The Challenges and Joys of Analysing Ongoing Language Change in Web-based Corpora: a Case Study</title>
    <author><first>Anne</first><last>Krause</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>27&#8211;34</pages>
    <url>http://anthology.aclweb.org/W16-2604</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>krause:2016:WAC-X</bibkey>
  </paper>

  <paper id="2605">
    <title>Using the Web and Social Media as Corpora for Monitoring the Spread of Neologisms. The case of 'rapefugee', 'rapeugee', and 'rapugee'.</title>
    <author><first>Quirin</first><last>W&#252;rschinger</last></author>
    <author><first>Mohammad Fazleh</first><last>Elahi</last></author>
    <author><first>Desislava</first><last>Zhekova</last></author>
    <author><first>Hans-J&#246;rg</first><last>Schmid</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>35&#8211;43</pages>
    <url>http://anthology.aclweb.org/W16-2605</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>wurschinger-EtAl:2016:WAC-X</bibkey>
  </paper>

  <paper id="2606">
    <title>EmpiriST 2015: A Shared Task on the Automatic Linguistic Annotation of Computer-Mediated Communication and Web Corpora</title>
    <author><first>Michael</first><last>Bei&#223;wenger</last></author>
    <author><first>Sabine</first><last>Bartsch</last></author>
    <author><first>Stefan</first><last>Evert</last></author>
    <author><first>Kay-Michael</first><last>W&#252;rzner</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>44&#8211;56</pages>
    <url>http://anthology.aclweb.org/W16-2606</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>beisswenger-EtAl:2016:WAC-X</bibkey>
  </paper>

  <paper id="2607">
    <title>SoMaJo: State-of-the-art tokenization for German web and social media texts</title>
    <author><first>Thomas</first><last>Proisl</last></author>
    <author><first>Peter</first><last>Uhrig</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>57&#8211;62</pages>
    <url>http://anthology.aclweb.org/W16-2607</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>proisl-uhrig:2016:WAC-X</bibkey>
  </paper>

  <paper id="2608">
    <title>UdS-(retrain|distributional|surface): Improving POS Tagging for OOV Words in German CMC and Web Data</title>
    <author><first>Jakob</first><last>Prange</last></author>
    <author><first>Andrea</first><last>Horbach</last></author>
    <author><first>Stefan</first><last>Thater</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>63&#8211;71</pages>
    <url>http://anthology.aclweb.org/W16-2608</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>prange-horbach-thater:2016:WAC-X</bibkey>
  </paper>

  <paper id="2609">
    <title>Babler - Data Collection from the Web to Support Speech Recognition and Keyword Search</title>
    <author><first>Gideon</first><last>Mendels</last></author>
    <author><first>Erica</first><last>Cooper</last></author>
    <author><first>Julia</first><last>Hirschberg</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>72&#8211;81</pages>
    <url>http://anthology.aclweb.org/W16-2609</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>mendels-cooper-hirschberg:2016:WAC-X</bibkey>
  </paper>

  <paper id="2610">
    <title>A Global Analysis of Emoji Usage</title>
    <author><first>Nikola</first><last>Ljube&#x161;i&#x107;</last></author>
    <author><first>Darja</first><last>Fi&#x161;er</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>82&#8211;89</pages>
    <url>http://anthology.aclweb.org/W16-2610</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>ljubevsic-fivser:2016:WAC-X</bibkey>
  </paper>

  <paper id="2611">
    <title>Genre classification for a corpus of academic webpages</title>
    <author><first>Erika</first><last>Dalan</last></author>
    <author><first>Serge</first><last>Sharoff</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>90&#8211;98</pages>
    <url>http://anthology.aclweb.org/W16-2611</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>dalan-sharoff:2016:WAC-X</bibkey>
  </paper>

  <paper id="2612">
    <title>On Bias-free Crawling and Representative Web Corpora</title>
    <author><first>Roland</first><last>Sch&#228;fer</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>99&#8211;105</pages>
    <url>http://anthology.aclweb.org/W16-2612</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>schafer:2016:WAC-X</bibkey>
  </paper>

  <paper id="2613">
    <title>EmpiriST: AIPHES - Robust Tokenization and POS-Tagging for Different Genres</title>
    <author><first>Steffen</first><last>Remus</last></author>
    <author><first>Gerold</first><last>Hintz</last></author>
    <author><first>Chris</first><last>Biemann</last></author>
    <author><first>Christian M.</first><last>Meyer</last></author>
    <author><first>Darina</first><last>Benikova</last></author>
    <author><first>Judith</first><last>Eckle-Kohler</last></author>
    <author><first>Margot</first><last>Mieskes</last></author>
    <author><first>Thomas</first><last>Arnold</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>106&#8211;114</pages>
    <url>http://anthology.aclweb.org/W16-2613</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>remus-EtAl:2016:WAC-X</bibkey>
  </paper>

  <paper id="2614">
    <title>bot.zen $@$ EmpiriST 2015 - A minimally-deep learning PoS-tagger (trained for German CMC and Web data)</title>
    <author><first>Egon</first><last>Stemle</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>115&#8211;119</pages>
    <url>http://anthology.aclweb.org/W16-2614</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>stemle:2016:WAC-X</bibkey>
  </paper>

  <paper id="2615">
    <title>LTL-UDE $@$ EmpiriST 2015: Tokenization and PoS Tagging of Social Media Text</title>
    <author><first>Tobias</first><last>Horsmann</last></author>
    <author><first>Torsten</first><last>Zesch</last></author>
    <booktitle>Proceedings of the 10th Web as Corpus Workshop</booktitle>
    <month>August</month>
    <year>2016</year>
    <address>Berlin</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>120&#8211;126</pages>
    <url>http://anthology.aclweb.org/W16-2615</url>
    <bibtype>inproceedings</bibtype>
    <bibkey>horsmann-zesch:2016:WAC-X</bibkey>
  </paper>

</volume>

