@inproceedings{abdelhalim-etal-2023-training,
title = "Training Models on Oversampled Data and a Novel Multi-class Annotation Scheme for Dementia Detection",
author = "Abdelhalim, Nadine and
Abdelhalim, Ingy and
Batista-Navarro, Riza",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Rumshisky, Anna",
booktitle = "Proceedings of the 5th Clinical Natural Language Processing Workshop",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.clinicalnlp-1.15",
doi = "10.18653/v1/2023.clinicalnlp-1.15",
pages = "118--124",
abstract = "This work introduces a novel three-class annotation scheme for text-based dementia classification in patients, based on their recorded visit interactions. Multiple models were developed utilising BERT, RoBERTa and DistilBERT. Two approaches were employed to improve the representation of dementia samples: oversampling the underrepresented data points in the original Pitt dataset and combining the Pitt with the Holland and Kempler datasets. The DistilBERT models trained on either an oversampled Pitt dataset or the combined dataset performed best in classifying the dementia class. Specifically, the model trained on the oversampled Pitt dataset and the one trained on the combined dataset obtained state-of-the-art performance with 98.8{\%} overall accuracy and 98.6{\%} macro-averaged F1-score, respectively. The models{'} outputs were manually inspected through saliency highlighting, using Local Interpretable Model-agnostic Explanations (LIME), to provide a better understanding of its predictions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdelhalim-etal-2023-training">
<titleInfo>
<title>Training Models on Oversampled Data and a Novel Multi-class Annotation Scheme for Dementia Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nadine</namePart>
<namePart type="family">Abdelhalim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ingy</namePart>
<namePart type="family">Abdelhalim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Riza</namePart>
<namePart type="family">Batista-Navarro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="family">Ben Abacha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work introduces a novel three-class annotation scheme for text-based dementia classification in patients, based on their recorded visit interactions. Multiple models were developed utilising BERT, RoBERTa and DistilBERT. Two approaches were employed to improve the representation of dementia samples: oversampling the underrepresented data points in the original Pitt dataset and combining the Pitt with the Holland and Kempler datasets. The DistilBERT models trained on either an oversampled Pitt dataset or the combined dataset performed best in classifying the dementia class. Specifically, the model trained on the oversampled Pitt dataset and the one trained on the combined dataset obtained state-of-the-art performance with 98.8% overall accuracy and 98.6% macro-averaged F1-score, respectively. The models’ outputs were manually inspected through saliency highlighting, using Local Interpretable Model-agnostic Explanations (LIME), to provide a better understanding of its predictions.</abstract>
<identifier type="citekey">abdelhalim-etal-2023-training</identifier>
<identifier type="doi">10.18653/v1/2023.clinicalnlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2023.clinicalnlp-1.15</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>118</start>
<end>124</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Training Models on Oversampled Data and a Novel Multi-class Annotation Scheme for Dementia Detection
%A Abdelhalim, Nadine
%A Abdelhalim, Ingy
%A Batista-Navarro, Riza
%Y Naumann, Tristan
%Y Ben Abacha, Asma
%Y Bethard, Steven
%Y Roberts, Kirk
%Y Rumshisky, Anna
%S Proceedings of the 5th Clinical Natural Language Processing Workshop
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F abdelhalim-etal-2023-training
%X This work introduces a novel three-class annotation scheme for text-based dementia classification in patients, based on their recorded visit interactions. Multiple models were developed utilising BERT, RoBERTa and DistilBERT. Two approaches were employed to improve the representation of dementia samples: oversampling the underrepresented data points in the original Pitt dataset and combining the Pitt with the Holland and Kempler datasets. The DistilBERT models trained on either an oversampled Pitt dataset or the combined dataset performed best in classifying the dementia class. Specifically, the model trained on the oversampled Pitt dataset and the one trained on the combined dataset obtained state-of-the-art performance with 98.8% overall accuracy and 98.6% macro-averaged F1-score, respectively. The models’ outputs were manually inspected through saliency highlighting, using Local Interpretable Model-agnostic Explanations (LIME), to provide a better understanding of its predictions.
%R 10.18653/v1/2023.clinicalnlp-1.15
%U https://aclanthology.org/2023.clinicalnlp-1.15
%U https://doi.org/10.18653/v1/2023.clinicalnlp-1.15
%P 118-124
Markdown (Informal)
[Training Models on Oversampled Data and a Novel Multi-class Annotation Scheme for Dementia Detection](https://aclanthology.org/2023.clinicalnlp-1.15) (Abdelhalim et al., ClinicalNLP 2023)
ACL