@inproceedings{stankovic-etal-2024-towards,
title = "Towards Semantic Interoperability: Parallel Corpora as Linked Data Incorporating Named Entity Linking",
author = "Stankovi{\'c}, Ranka and
Ikoni{\'c} Ne{\v{s}}i{\'c}, Milica and
Perisic, Olja and
{\v{S}}kori{\'c}, Mihailo and
Kitanovi{\'c}, Olivera",
editor = "Chiarcos, Christian and
Gkirtzou, Katerina and
Ionov, Maxim and
Khan, Fahad and
McCrae, John P. and
Ponsoda, Elena Montiel and
Chozas, Patricia Mart{\'\i}n",
booktitle = "Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.ldl-1.15",
pages = "115--125",
abstract = "The paper presents the results of the research related to the preparation of parallel corpora, focusing on transformation into RDF graphs using NLP Interchange Format (NIF) for linguistic annotation. We give an overview of the parallel corpus that was used in this case study, as well as the process of POS tagging, lemmatization, named entity recognition (NER), and named entity linking (NEL), which is implemented using Wikidata. In the first phase of NEL main characters and places mentioned in novels are stored in Wikidata and in the second phase they are linked with the occurrences of previously annotated entities in text. Next, we describe the named entity linking (NEL), data conversion to RDF, and incorporation of NIF annotations. Produced NIF files were evaluated through the exploration of triplestore using SPARQL queries. Finally, the bridging of Linked Data and Digital Humanities research is discussed, as well as some drawbacks related to the verbosity of transformation. Semantic interoperability concept in the context of linked data and parallel corpora ensures that data exchanged between systems carries shared and well-defined meanings, enabling effective communication and understanding.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stankovic-etal-2024-towards">
<titleInfo>
<title>Towards Semantic Interoperability: Parallel Corpora as Linked Data Incorporating Named Entity Linking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ranka</namePart>
<namePart type="family">Stanković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milica</namePart>
<namePart type="family">Ikonić Nešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olja</namePart>
<namePart type="family">Perisic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihailo</namePart>
<namePart type="family">Škorić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivera</namePart>
<namePart type="family">Kitanović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Chiarcos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Gkirtzou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxim</namePart>
<namePart type="family">Ionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahad</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="given">Montiel</namePart>
<namePart type="family">Ponsoda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="given">Martín</namePart>
<namePart type="family">Chozas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper presents the results of the research related to the preparation of parallel corpora, focusing on transformation into RDF graphs using NLP Interchange Format (NIF) for linguistic annotation. We give an overview of the parallel corpus that was used in this case study, as well as the process of POS tagging, lemmatization, named entity recognition (NER), and named entity linking (NEL), which is implemented using Wikidata. In the first phase of NEL main characters and places mentioned in novels are stored in Wikidata and in the second phase they are linked with the occurrences of previously annotated entities in text. Next, we describe the named entity linking (NEL), data conversion to RDF, and incorporation of NIF annotations. Produced NIF files were evaluated through the exploration of triplestore using SPARQL queries. Finally, the bridging of Linked Data and Digital Humanities research is discussed, as well as some drawbacks related to the verbosity of transformation. Semantic interoperability concept in the context of linked data and parallel corpora ensures that data exchanged between systems carries shared and well-defined meanings, enabling effective communication and understanding.</abstract>
<identifier type="citekey">stankovic-etal-2024-towards</identifier>
<location>
<url>https://aclanthology.org/2024.ldl-1.15</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>115</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Semantic Interoperability: Parallel Corpora as Linked Data Incorporating Named Entity Linking
%A Stanković, Ranka
%A Ikonić Nešić, Milica
%A Perisic, Olja
%A Škorić, Mihailo
%A Kitanović, Olivera
%Y Chiarcos, Christian
%Y Gkirtzou, Katerina
%Y Ionov, Maxim
%Y Khan, Fahad
%Y McCrae, John P.
%Y Ponsoda, Elena Montiel
%Y Chozas, Patricia Martín
%S Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F stankovic-etal-2024-towards
%X The paper presents the results of the research related to the preparation of parallel corpora, focusing on transformation into RDF graphs using NLP Interchange Format (NIF) for linguistic annotation. We give an overview of the parallel corpus that was used in this case study, as well as the process of POS tagging, lemmatization, named entity recognition (NER), and named entity linking (NEL), which is implemented using Wikidata. In the first phase of NEL main characters and places mentioned in novels are stored in Wikidata and in the second phase they are linked with the occurrences of previously annotated entities in text. Next, we describe the named entity linking (NEL), data conversion to RDF, and incorporation of NIF annotations. Produced NIF files were evaluated through the exploration of triplestore using SPARQL queries. Finally, the bridging of Linked Data and Digital Humanities research is discussed, as well as some drawbacks related to the verbosity of transformation. Semantic interoperability concept in the context of linked data and parallel corpora ensures that data exchanged between systems carries shared and well-defined meanings, enabling effective communication and understanding.
%U https://aclanthology.org/2024.ldl-1.15
%P 115-125
Markdown (Informal)
[Towards Semantic Interoperability: Parallel Corpora as Linked Data Incorporating Named Entity Linking](https://aclanthology.org/2024.ldl-1.15) (Stanković et al., LDL-WS 2024)
ACL