@inproceedings{POMIKLEK12.1047.L12-1624,
    author = {Jan Pomik{\'a}lek and Milo\v{s} Jakub{\'\i}\v{c}ek and Pavel Rychl{\'y}},
    url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/1047_Paper.pdf},
    note = {ACL Anthology Identifier: L12-1624},
    title = {Building a 70 billion word corpus of English from ClueWeb},
    booktitle = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC-2012)},
    year = {2012},
    month = {May},
    address = {Istanbul, Turkey},
    editor = {Nicoletta Calzolari and Khalid Choukri and Thierry Declerck and Mehmet U\u{g}ur Do\u{g}an and Bente Maegaard and Joseph Mariani and Jan Odijk and Stelios Piperidis},
    publisher = {European Language Resources Association (ELRA)},
    isbn = {978-2-9517408-7-7},
    language = {English},
    pages = {502--506}
    }
