@inproceedings{brummer-etal-2016-dbpedia,
title = "{DB}pedia Abstracts: A Large-Scale, Open, Multilingual {NLP} Training Corpus",
author = {Br{\"u}mmer, Martin and
Dojchinovski, Milan and
Hellmann, Sebastian},
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1532",
pages = "3339--3343",
abstract = "The ever increasing importance of machine learning in Natural Language Processing is accompanied by an equally increasing need in large-scale training and evaluation corpora. Due to its size, its openness and relative quality, the Wikipedia has already been a source of such data, but on a limited scale. This paper introduces the DBpedia Abstract Corpus, a large-scale, open corpus of annotated Wikipedia texts in six languages, featuring over 11 million texts and over 97 million entity links. The properties of the Wikipedia texts are being described, as well as the corpus creation process, its format and interesting use-cases, like Named Entity Linking training and evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brummer-etal-2016-dbpedia">
<titleInfo>
<title>DBpedia Abstracts: A Large-Scale, Open, Multilingual NLP Training Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Brümmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milan</namePart>
<namePart type="family">Dojchinovski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Hellmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The ever increasing importance of machine learning in Natural Language Processing is accompanied by an equally increasing need in large-scale training and evaluation corpora. Due to its size, its openness and relative quality, the Wikipedia has already been a source of such data, but on a limited scale. This paper introduces the DBpedia Abstract Corpus, a large-scale, open corpus of annotated Wikipedia texts in six languages, featuring over 11 million texts and over 97 million entity links. The properties of the Wikipedia texts are being described, as well as the corpus creation process, its format and interesting use-cases, like Named Entity Linking training and evaluation.</abstract>
<identifier type="citekey">brummer-etal-2016-dbpedia</identifier>
<location>
<url>https://aclanthology.org/L16-1532</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>3339</start>
<end>3343</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DBpedia Abstracts: A Large-Scale, Open, Multilingual NLP Training Corpus
%A Brümmer, Martin
%A Dojchinovski, Milan
%A Hellmann, Sebastian
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F brummer-etal-2016-dbpedia
%X The ever increasing importance of machine learning in Natural Language Processing is accompanied by an equally increasing need in large-scale training and evaluation corpora. Due to its size, its openness and relative quality, the Wikipedia has already been a source of such data, but on a limited scale. This paper introduces the DBpedia Abstract Corpus, a large-scale, open corpus of annotated Wikipedia texts in six languages, featuring over 11 million texts and over 97 million entity links. The properties of the Wikipedia texts are being described, as well as the corpus creation process, its format and interesting use-cases, like Named Entity Linking training and evaluation.
%U https://aclanthology.org/L16-1532
%P 3339-3343
Markdown (Informal)
[DBpedia Abstracts: A Large-Scale, Open, Multilingual NLP Training Corpus](https://aclanthology.org/L16-1532) (Brümmer et al., LREC 2016)
ACL