@inproceedings{frei-kramer-2024-creating,
title = "Creating Ontology-annotated Corpora from {W}ikipedia for Medical Named-entity Recognition",
author = "Frei, Johann and
Kramer, Frank",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "Proceedings of the 23rd Workshop on Biomedical Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.bionlp-1.47",
doi = "10.18653/v1/2024.bionlp-1.47",
pages = "570--579",
abstract = "Acquiring annotated corpora for medical NLP is challenging due to legal and privacy constraints and costly annotation efforts, and using annotated public datasets may do not align well to the desired target application in terms of annotation style or language. We investigate the approach of utilizing Wikipedia and WikiData jointly to acquire an unsupervised annotated corpus for named-entity recognition (NER). By controlling the annotation ruleset through WikiData{'}s ontology, we extract custom-defined annotations and dynamically impute weak annotations by an adaptive loss scaling. Our validation on German medication detection datasets yields competitive results. The entire pipeline only relies on open models and data resources, enabling reproducibility and open sharing of models and corpora. All relevant assets are shared on GitHub.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="frei-kramer-2024-creating">
<titleInfo>
<title>Creating Ontology-annotated Corpora from Wikipedia for Medical Named-entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johann</namePart>
<namePart type="family">Frei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Kramer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Workshop on Biomedical Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Acquiring annotated corpora for medical NLP is challenging due to legal and privacy constraints and costly annotation efforts, and using annotated public datasets may do not align well to the desired target application in terms of annotation style or language. We investigate the approach of utilizing Wikipedia and WikiData jointly to acquire an unsupervised annotated corpus for named-entity recognition (NER). By controlling the annotation ruleset through WikiData’s ontology, we extract custom-defined annotations and dynamically impute weak annotations by an adaptive loss scaling. Our validation on German medication detection datasets yields competitive results. The entire pipeline only relies on open models and data resources, enabling reproducibility and open sharing of models and corpora. All relevant assets are shared on GitHub.</abstract>
<identifier type="citekey">frei-kramer-2024-creating</identifier>
<identifier type="doi">10.18653/v1/2024.bionlp-1.47</identifier>
<location>
<url>https://aclanthology.org/2024.bionlp-1.47</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>570</start>
<end>579</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Creating Ontology-annotated Corpora from Wikipedia for Medical Named-entity Recognition
%A Frei, Johann
%A Kramer, Frank
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S Proceedings of the 23rd Workshop on Biomedical Natural Language Processing
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F frei-kramer-2024-creating
%X Acquiring annotated corpora for medical NLP is challenging due to legal and privacy constraints and costly annotation efforts, and using annotated public datasets may do not align well to the desired target application in terms of annotation style or language. We investigate the approach of utilizing Wikipedia and WikiData jointly to acquire an unsupervised annotated corpus for named-entity recognition (NER). By controlling the annotation ruleset through WikiData’s ontology, we extract custom-defined annotations and dynamically impute weak annotations by an adaptive loss scaling. Our validation on German medication detection datasets yields competitive results. The entire pipeline only relies on open models and data resources, enabling reproducibility and open sharing of models and corpora. All relevant assets are shared on GitHub.
%R 10.18653/v1/2024.bionlp-1.47
%U https://aclanthology.org/2024.bionlp-1.47
%U https://doi.org/10.18653/v1/2024.bionlp-1.47
%P 570-579
Markdown (Informal)
[Creating Ontology-annotated Corpora from Wikipedia for Medical Named-entity Recognition](https://aclanthology.org/2024.bionlp-1.47) (Frei & Kramer, BioNLP-WS 2024)
ACL