@inproceedings{L16-1669,
 abstract = {We present our effort to create a large Multi-Layered representational repository of Linguistic Code-Switched Arabic data. The process involves developing clear annotation standards and Guidelines, streamlining the annotation process, and implementing quality control measures. We used two main protocols for annotation: in-lab gold annotations and crowd sourcing annotations. We developed a web-based annotation tool to facilitate the management of the annotation process. The current version of the repository contains a total of 886,252 tokens that are tagged into one of sixteen code-switching tags. The data exhibits code switching between Modern Standard Arabic and Egyptian Dialectal Arabic representing three data genres: Tweets, commentaries, and discussion fora. The overall Inter-Annotator Agreement is 93.1\%.
},
 address = {Portorož, Slovenia},
 author = {Mona Diab and Mahmoud Ghoneim and Abdelati Hawwari and Fahad AlGhamdi and Nada AlMarwani and Mohamed Al-Badrashiny},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
 month = {May},
 pages = {4228--4235},
 publisher = {European Language Resources Association (ELRA)},
 title = {Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched Arabic Data},
 url = {https://www.aclweb.org/anthology/L16-1669},
 year = {2016}
}

