@inproceedings{ikonic-nesic-etal-2022-eltec,
title = "From {ELT}e{C} Text Collection Metadata and Named Entities to Linked-data (and Back)",
author = {Ikoni{\'c} Ne{\v{s}}i{\'c}, Milica and
Stankovi{\'c}, Ranka and
Sch{\"o}ch, Christof and
Skoric, Mihailo},
booktitle = "Proceedings of the 8th Workshop on Linked Data in Linguistics within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.ldl-1.2",
pages = "7--16",
abstract = "In this paper we present the wikification of the ELTeC (European Literary Text Collection), developed within the COST Action {``}Distant Reading for European Literary History{''} (CA16204). ELTeC is a multilingual corpus of novels written in the time period 1840{---}1920, built to apply distant reading methods and tools to explore the European literary history. We present the pipeline that led to the production of the linked dataset, the novels{'} metadata retrieval and named entity recognition, transformation, mapping and Wikidata population, followed by named entity linking and export to NIF (NLP Interchange Format). The speeding up of the process of data preparation and import to Wikidata is presented on the use case of seven sub-collections of ELTeC (English, Portuguese, French, Slovenian, German, Hungarian and Serbian). Our goal was to automate the process of preparing and importing information, so OpenRefine and QuickStatements were chosen as the best options. The paper also includes examples of SPARQL queries for retrieval of authors, novel titles, publication places and other metadata with different visualisation options as well as statistical overviews.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ikonic-nesic-etal-2022-eltec">
<titleInfo>
<title>From ELTeC Text Collection Metadata and Named Entities to Linked-data (and Back)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Milica</namePart>
<namePart type="family">Ikonić Nešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranka</namePart>
<namePart type="family">Stanković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Schöch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihailo</namePart>
<namePart type="family">Skoric</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Linked Data in Linguistics within the 13th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present the wikification of the ELTeC (European Literary Text Collection), developed within the COST Action “Distant Reading for European Literary History” (CA16204). ELTeC is a multilingual corpus of novels written in the time period 1840—1920, built to apply distant reading methods and tools to explore the European literary history. We present the pipeline that led to the production of the linked dataset, the novels’ metadata retrieval and named entity recognition, transformation, mapping and Wikidata population, followed by named entity linking and export to NIF (NLP Interchange Format). The speeding up of the process of data preparation and import to Wikidata is presented on the use case of seven sub-collections of ELTeC (English, Portuguese, French, Slovenian, German, Hungarian and Serbian). Our goal was to automate the process of preparing and importing information, so OpenRefine and QuickStatements were chosen as the best options. The paper also includes examples of SPARQL queries for retrieval of authors, novel titles, publication places and other metadata with different visualisation options as well as statistical overviews.</abstract>
<identifier type="citekey">ikonic-nesic-etal-2022-eltec</identifier>
<location>
<url>https://aclanthology.org/2022.ldl-1.2</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>7</start>
<end>16</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From ELTeC Text Collection Metadata and Named Entities to Linked-data (and Back)
%A Ikonić Nešić, Milica
%A Stanković, Ranka
%A Schöch, Christof
%A Skoric, Mihailo
%S Proceedings of the 8th Workshop on Linked Data in Linguistics within the 13th Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F ikonic-nesic-etal-2022-eltec
%X In this paper we present the wikification of the ELTeC (European Literary Text Collection), developed within the COST Action “Distant Reading for European Literary History” (CA16204). ELTeC is a multilingual corpus of novels written in the time period 1840—1920, built to apply distant reading methods and tools to explore the European literary history. We present the pipeline that led to the production of the linked dataset, the novels’ metadata retrieval and named entity recognition, transformation, mapping and Wikidata population, followed by named entity linking and export to NIF (NLP Interchange Format). The speeding up of the process of data preparation and import to Wikidata is presented on the use case of seven sub-collections of ELTeC (English, Portuguese, French, Slovenian, German, Hungarian and Serbian). Our goal was to automate the process of preparing and importing information, so OpenRefine and QuickStatements were chosen as the best options. The paper also includes examples of SPARQL queries for retrieval of authors, novel titles, publication places and other metadata with different visualisation options as well as statistical overviews.
%U https://aclanthology.org/2022.ldl-1.2
%P 7-16
Markdown (Informal)
[From ELTeC Text Collection Metadata and Named Entities to Linked-data (and Back)](https://aclanthology.org/2022.ldl-1.2) (Ikonić Nešić et al., LDL 2022)
ACL