@inproceedings{L16-1404,
 abstract = {We introduce CODE ALLTAG, a text corpus composed of German-language e-mails. It is divided into two partitions: the first of these portions, CODE ALLTAG\_XL, consists of a bulk-size collection drawn from an openly accessible e-mail archive (roughly 1.5M e-mails), whereas the second portion, CODE ALLTAG\_S+d, is much smaller in size (less than thousand e-mails), yet excels with demographic data from each author of an e-mail. CODE ALLTAG, thus, currently constitutes the largest E-Mail corpus ever built. In this paper, we describe, for both parts, the solicitation process for gathering e-mails, present descriptive statistical properties of the corpus, and, for CODE ALLTAG\_S+d, reveal a compilation of demographic features of the donors of e-mails.
},
 address = {Portorož, Slovenia},
 author = {Ulrike Krieg-Holz and Christian Schuschnig and Franz Matthies and Benjamin Redling and Udo Hahn},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
 month = {May},
 pages = {2543--2550},
 publisher = {European Language Resources Association (ELRA)},
 title = {CodE Alltag: A German-Language E-Mail Corpus},
 url = {https://www.aclweb.org/anthology/L16-1404},
 year = {2016}
}

