@inproceedings{loffler-etal-2020-tag,
title = "Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the {QEMP} Corpus and the {B}iodiv{T}agger",
author = {L{\"o}ffler, Felicitas and
Abdelmageed, Nora and
Babalou, Samira and
Kaur, Pawandeep and
K{\"o}nig-Ries, Birgitta},
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.560",
pages = "4557--4564",
abstract = "Dataset Retrieval is gaining importance due to a large amount of research data and the great demand for reusing scientific data. Dataset Retrieval is mostly based on metadata, structured information about the primary data. Enriching these metadata with semantic annotations based on Linked Open Data (LOD) enables datasets, publications and authors to be connected and expands the search on semantically related terms. In this work, we introduce the BiodivTagger, an ontology-based Information Extraction pipeline, developed for metadata from biodiversity research. The system recognizes biological, physical and chemical processes, environmental terms, data parameters and phenotypes as well as materials and chemical compounds and links them to concepts in dedicated ontologies. To evaluate our pipeline, we created a gold standard of 50 metadata files (QEMP corpus) selected from five different data repositories in biodiversity research. To the best of our knowledge, this is the first annotated metadata corpus for biodiversity research data. The results reveal a mixed picture. While materials and data parameters are properly matched to ontological concepts in most cases, some ontological issues occurred for processes and environmental terms.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="loffler-etal-2020-tag">
<titleInfo>
<title>Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the QEMP Corpus and the BiodivTagger</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felicitas</namePart>
<namePart type="family">Löffler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Abdelmageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Babalou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawandeep</namePart>
<namePart type="family">Kaur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Birgitta</namePart>
<namePart type="family">König-Ries</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Dataset Retrieval is gaining importance due to a large amount of research data and the great demand for reusing scientific data. Dataset Retrieval is mostly based on metadata, structured information about the primary data. Enriching these metadata with semantic annotations based on Linked Open Data (LOD) enables datasets, publications and authors to be connected and expands the search on semantically related terms. In this work, we introduce the BiodivTagger, an ontology-based Information Extraction pipeline, developed for metadata from biodiversity research. The system recognizes biological, physical and chemical processes, environmental terms, data parameters and phenotypes as well as materials and chemical compounds and links them to concepts in dedicated ontologies. To evaluate our pipeline, we created a gold standard of 50 metadata files (QEMP corpus) selected from five different data repositories in biodiversity research. To the best of our knowledge, this is the first annotated metadata corpus for biodiversity research data. The results reveal a mixed picture. While materials and data parameters are properly matched to ontological concepts in most cases, some ontological issues occurred for processes and environmental terms.</abstract>
<identifier type="citekey">loffler-etal-2020-tag</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.560</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>4557</start>
<end>4564</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the QEMP Corpus and the BiodivTagger
%A Löffler, Felicitas
%A Abdelmageed, Nora
%A Babalou, Samira
%A Kaur, Pawandeep
%A König-Ries, Birgitta
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F loffler-etal-2020-tag
%X Dataset Retrieval is gaining importance due to a large amount of research data and the great demand for reusing scientific data. Dataset Retrieval is mostly based on metadata, structured information about the primary data. Enriching these metadata with semantic annotations based on Linked Open Data (LOD) enables datasets, publications and authors to be connected and expands the search on semantically related terms. In this work, we introduce the BiodivTagger, an ontology-based Information Extraction pipeline, developed for metadata from biodiversity research. The system recognizes biological, physical and chemical processes, environmental terms, data parameters and phenotypes as well as materials and chemical compounds and links them to concepts in dedicated ontologies. To evaluate our pipeline, we created a gold standard of 50 metadata files (QEMP corpus) selected from five different data repositories in biodiversity research. To the best of our knowledge, this is the first annotated metadata corpus for biodiversity research data. The results reveal a mixed picture. While materials and data parameters are properly matched to ontological concepts in most cases, some ontological issues occurred for processes and environmental terms.
%U https://aclanthology.org/2020.lrec-1.560
%P 4557-4564
Markdown (Informal)
[Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the QEMP Corpus and the BiodivTagger](https://aclanthology.org/2020.lrec-1.560) (Löffler et al., LREC 2020)
ACL