@inproceedings{petri-cohn-2016-succinct,
title = "Succinct Data Structures for {NLP}-at-Scale",
author = "Petri, Matthias and
Cohn, Trevor",
editor = "Federico, Marcello and
Aizawa, Akiko",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Tutorial Abstracts",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/C16-3006/",
pages = "20--21",
abstract = "Succinct data structures involve the use of novel data structures, compression technologies, and other mechanisms to allow data to be stored in extremely small memory or disk footprints, while still allowing for efficient access to the underlying data. They have successfully been applied in areas such as Information Retrieval and Bioinformatics to create highly compressible in-memory search indexes which provide efficient search functionality over datasets which traditionally could only be processed using external memory data structures. Modern technologies in this space are not well known within the NLP community, but have the potential to revolutionise NLP, particularly the application to {\textquoteleft}big data' in the form of terabyte and larger corpora. This tutorial will present a practical introduction to the most important succinct data structures, tools, and applications with the intent of providing the researchers with a jump-start into this domain. The focus of this tutorial will be efficient text processing utilising space efficient representations of suffix arrays, suffix trees and searchable integer compression schemes with specific applications of succinct data structures to common NLP tasks such as $n$-gram language modelling."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="petri-cohn-2016-succinct">
<titleInfo>
<title>Succinct Data Structures for NLP-at-Scale</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Petri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Tutorial Abstracts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Aizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Succinct data structures involve the use of novel data structures, compression technologies, and other mechanisms to allow data to be stored in extremely small memory or disk footprints, while still allowing for efficient access to the underlying data. They have successfully been applied in areas such as Information Retrieval and Bioinformatics to create highly compressible in-memory search indexes which provide efficient search functionality over datasets which traditionally could only be processed using external memory data structures. Modern technologies in this space are not well known within the NLP community, but have the potential to revolutionise NLP, particularly the application to ‘big data’ in the form of terabyte and larger corpora. This tutorial will present a practical introduction to the most important succinct data structures, tools, and applications with the intent of providing the researchers with a jump-start into this domain. The focus of this tutorial will be efficient text processing utilising space efficient representations of suffix arrays, suffix trees and searchable integer compression schemes with specific applications of succinct data structures to common NLP tasks such as n-gram language modelling.</abstract>
<identifier type="citekey">petri-cohn-2016-succinct</identifier>
<location>
<url>https://aclanthology.org/C16-3006/</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>20</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Succinct Data Structures for NLP-at-Scale
%A Petri, Matthias
%A Cohn, Trevor
%Y Federico, Marcello
%Y Aizawa, Akiko
%S Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Tutorial Abstracts
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F petri-cohn-2016-succinct
%X Succinct data structures involve the use of novel data structures, compression technologies, and other mechanisms to allow data to be stored in extremely small memory or disk footprints, while still allowing for efficient access to the underlying data. They have successfully been applied in areas such as Information Retrieval and Bioinformatics to create highly compressible in-memory search indexes which provide efficient search functionality over datasets which traditionally could only be processed using external memory data structures. Modern technologies in this space are not well known within the NLP community, but have the potential to revolutionise NLP, particularly the application to ‘big data’ in the form of terabyte and larger corpora. This tutorial will present a practical introduction to the most important succinct data structures, tools, and applications with the intent of providing the researchers with a jump-start into this domain. The focus of this tutorial will be efficient text processing utilising space efficient representations of suffix arrays, suffix trees and searchable integer compression schemes with specific applications of succinct data structures to common NLP tasks such as n-gram language modelling.
%U https://aclanthology.org/C16-3006/
%P 20-21
Markdown (Informal)
[Succinct Data Structures for NLP-at-Scale](https://aclanthology.org/C16-3006/) (Petri & Cohn, COLING 2016)
ACL
- Matthias Petri and Trevor Cohn. 2016. Succinct Data Structures for NLP-at-Scale. In Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Tutorial Abstracts, pages 20–21, Osaka, Japan. The COLING 2016 Organizing Committee.