@inproceedings{L16-1321,
 abstract = {Data acquisition in dialectology is typically a tedious task, as dialect samples of spoken language have to be collected via questionnaires or interviews. In this article, we suggest to use the ``web as a corpus'' approach for dialectology. We present a case study that demonstrates how authentic language data for the Bavarian dialect (ISO 639-3:bar) can be collected automatically from the social network Facebook. We also show that Facebook can be used effectively as a crowdsourcing platform, where users are willing to translate dialect words collaboratively in order to create a common lexicon of their Bavarian dialect. Key insights from the case study are summarized as ``lessons learned'', together with suggestions for future enhancements of the lexicon creation approach.
},
 address = {Portorož, Slovenia},
 author = {Manuel Burghardt and Daniel Granvogl and Christian Wolff},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
 month = {May},
 pages = {2029--2033},
 publisher = {European Language Resources Association (ELRA)},
 title = {Creating a Lexicon of Bavarian Dialect by Means of Facebook Language Data and Crowdsourcing},
 url = {https://www.aclweb.org/anthology/L16-1321},
 year = {2016}
}

