@inproceedings{klang-nugues-2019-docria,
title = "{D}ocria: Processing and Storing Linguistic Data with {W}ikipedia",
author = "Klang, Marcus and
Nugues, Pierre",
editor = "Hartmann, Mareike and
Plank, Barbara",
booktitle = "Proceedings of the 22nd Nordic Conference on Computational Linguistics",
month = sep # "{--}" # oct,
year = "2019",
address = "Turku, Finland",
publisher = {Link{\"o}ping University Electronic Press},
url = "https://aclanthology.org/W19-6148",
pages = "400--405",
abstract = "The availability of user-generated content has increased significantly over time. Wikipedia is one example of a corpora which spans a huge range of topics and is freely available. Storing and processing these corpora requires flexible documents models as they may contain malicious and incorrect data. Docria is a library which attempts to address this issue by providing a solution which can be used with small to large corpora, from laptops using Python interactively in a Jupyter notebook to clusters running map-reduce frameworks with optimized compiled code. Docria is available as open-source code.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="klang-nugues-2019-docria">
<titleInfo>
<title>Docria: Processing and Storing Linguistic Data with Wikipedia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcus</namePart>
<namePart type="family">Klang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Nugues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-sep–oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Nordic Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mareike</namePart>
<namePart type="family">Hartmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press</publisher>
<place>
<placeTerm type="text">Turku, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The availability of user-generated content has increased significantly over time. Wikipedia is one example of a corpora which spans a huge range of topics and is freely available. Storing and processing these corpora requires flexible documents models as they may contain malicious and incorrect data. Docria is a library which attempts to address this issue by providing a solution which can be used with small to large corpora, from laptops using Python interactively in a Jupyter notebook to clusters running map-reduce frameworks with optimized compiled code. Docria is available as open-source code.</abstract>
<identifier type="citekey">klang-nugues-2019-docria</identifier>
<location>
<url>https://aclanthology.org/W19-6148</url>
</location>
<part>
<date>2019-sep–oct</date>
<extent unit="page">
<start>400</start>
<end>405</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Docria: Processing and Storing Linguistic Data with Wikipedia
%A Klang, Marcus
%A Nugues, Pierre
%Y Hartmann, Mareike
%Y Plank, Barbara
%S Proceedings of the 22nd Nordic Conference on Computational Linguistics
%D 2019
%8 sep–oct
%I Linköping University Electronic Press
%C Turku, Finland
%F klang-nugues-2019-docria
%X The availability of user-generated content has increased significantly over time. Wikipedia is one example of a corpora which spans a huge range of topics and is freely available. Storing and processing these corpora requires flexible documents models as they may contain malicious and incorrect data. Docria is a library which attempts to address this issue by providing a solution which can be used with small to large corpora, from laptops using Python interactively in a Jupyter notebook to clusters running map-reduce frameworks with optimized compiled code. Docria is available as open-source code.
%U https://aclanthology.org/W19-6148
%P 400-405
Markdown (Informal)
[Docria: Processing and Storing Linguistic Data with Wikipedia](https://aclanthology.org/W19-6148) (Klang & Nugues, NoDaLiDa 2019)
ACL