@inproceedings{suravee-etal-2025-challenge,
title = "The Challenge of Performing Ontology-driven Entity Extraction in Real-world Unstructured Textual Data from the Domain of Dementia",
author = "Suravee, Sumaiya and
Schmidt, Carsten Oliver and
Yordanova, Kristina",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.139/",
pages = "1205--1214",
abstract = "Named entity recognition allows the automated extraction of structured domain-related information from unstructured textual data. Our study explores the task of ontology-driven entity recognition, a sequence labelling process for custom named entity recognition for the domain of dementia, specifically from unstructured forum texts where unprofessional caregivers of people with dementia discuss the challenges they face related to agitation. The targeted corpus is loosely structured, contains ambiguous sentences and vocabulary that does not match the agitation-related medical vocabulary. To address the above challenges, we propose a pipeline that involves the following steps: 1) development of an annotation codebook; 2) annotation of a textual corpus collected from dementia forums, consisting of 45,216 sentences (775 questions and 5571 answers); 3) data augmentation to reduce the imbalance in the corpus; 4) training of a bidirectional LSTM model and a transformer model; 5) comparison of the results with those from few shot- and zero-shot based prompt engineering techniques using a pretrained large language model (LLaMa 3). The results showed that LLaMa 3 was more robust than traditional neural networks and transformer models in detecting underrepresented entities. Furthermore, the study demonstrates that data augmentation improves the entity recognition task when fine-tuning deep learning models. The paper illustrates the challenges of ontology-driven entity recognition in real-world datasets and proposes a roadmap to addressing them that is potentially transferable to other real-world domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="suravee-etal-2025-challenge">
<titleInfo>
<title>The Challenge of Performing Ontology-driven Entity Extraction in Real-world Unstructured Textual Data from the Domain of Dementia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sumaiya</namePart>
<namePart type="family">Suravee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carsten</namePart>
<namePart type="given">Oliver</namePart>
<namePart type="family">Schmidt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Yordanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named entity recognition allows the automated extraction of structured domain-related information from unstructured textual data. Our study explores the task of ontology-driven entity recognition, a sequence labelling process for custom named entity recognition for the domain of dementia, specifically from unstructured forum texts where unprofessional caregivers of people with dementia discuss the challenges they face related to agitation. The targeted corpus is loosely structured, contains ambiguous sentences and vocabulary that does not match the agitation-related medical vocabulary. To address the above challenges, we propose a pipeline that involves the following steps: 1) development of an annotation codebook; 2) annotation of a textual corpus collected from dementia forums, consisting of 45,216 sentences (775 questions and 5571 answers); 3) data augmentation to reduce the imbalance in the corpus; 4) training of a bidirectional LSTM model and a transformer model; 5) comparison of the results with those from few shot- and zero-shot based prompt engineering techniques using a pretrained large language model (LLaMa 3). The results showed that LLaMa 3 was more robust than traditional neural networks and transformer models in detecting underrepresented entities. Furthermore, the study demonstrates that data augmentation improves the entity recognition task when fine-tuning deep learning models. The paper illustrates the challenges of ontology-driven entity recognition in real-world datasets and proposes a roadmap to addressing them that is potentially transferable to other real-world domains.</abstract>
<identifier type="citekey">suravee-etal-2025-challenge</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.139/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1205</start>
<end>1214</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Challenge of Performing Ontology-driven Entity Extraction in Real-world Unstructured Textual Data from the Domain of Dementia
%A Suravee, Sumaiya
%A Schmidt, Carsten Oliver
%A Yordanova, Kristina
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F suravee-etal-2025-challenge
%X Named entity recognition allows the automated extraction of structured domain-related information from unstructured textual data. Our study explores the task of ontology-driven entity recognition, a sequence labelling process for custom named entity recognition for the domain of dementia, specifically from unstructured forum texts where unprofessional caregivers of people with dementia discuss the challenges they face related to agitation. The targeted corpus is loosely structured, contains ambiguous sentences and vocabulary that does not match the agitation-related medical vocabulary. To address the above challenges, we propose a pipeline that involves the following steps: 1) development of an annotation codebook; 2) annotation of a textual corpus collected from dementia forums, consisting of 45,216 sentences (775 questions and 5571 answers); 3) data augmentation to reduce the imbalance in the corpus; 4) training of a bidirectional LSTM model and a transformer model; 5) comparison of the results with those from few shot- and zero-shot based prompt engineering techniques using a pretrained large language model (LLaMa 3). The results showed that LLaMa 3 was more robust than traditional neural networks and transformer models in detecting underrepresented entities. Furthermore, the study demonstrates that data augmentation improves the entity recognition task when fine-tuning deep learning models. The paper illustrates the challenges of ontology-driven entity recognition in real-world datasets and proposes a roadmap to addressing them that is potentially transferable to other real-world domains.
%U https://aclanthology.org/2025.ranlp-1.139/
%P 1205-1214
Markdown (Informal)
[The Challenge of Performing Ontology-driven Entity Extraction in Real-world Unstructured Textual Data from the Domain of Dementia](https://aclanthology.org/2025.ranlp-1.139/) (Suravee et al., RANLP 2025)
ACL