@inproceedings{lozoya-etal-2024-truth,
title = "Truth in the Noise: Unveiling Authentic Dementia Self-Disclosure Statements in Social Media with {LLM}s",
author = "Lozoya, Daniel Cabrera and
Mikal, Jude P and
Wong, Yun Leng and
Hemmy, Laura S and
Conway, Mike",
editor = "Baldwin, Tim and
Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e} and
Kuo, Nicholas",
booktitle = "Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2024",
address = "Canberra, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.alta-1.16/",
pages = "189--196",
abstract = "Identifying self-disclosed health diagnoses in social media data using regular expressions (e.g. {\textquotedblleft}I`ve been diagnosed with {\ensuremath{<}}Disease X{\ensuremath{>}}{\textquotedblright}) is a well-established approach for creating ad hoc cohorts of individuals with specific health conditions. However there is evidence to suggest that this method of identifying individuals is unreliable when creating cohorts for some mental health and neurodegenerative conditions. In the case of dementia, the focus of this paper, diagnostic disclosures are frequently whimsical or sardonic, rather than indicative of an authentic diagnosis or underlying disease state (e.g. {\textquotedblleft}I forgot my keys again. I`ve got dementia!{\textquotedblright}). With this work and utilising an annotated corpus of 14,025 dementia diagnostic self-disclosure posts derived from Twitter, we leveraged LLMs to distinguish between {\textquotedblleft}authentic{\textquotedblright} dementia self-disclosures and {\textquotedblleft}inauthentic{\textquotedblright} self-disclosures. Specifically, we implemented a genetic algorithm that evolves prompts using various state-of-the-art prompt engineering techniques, including chain of thought, self-critique, generated knowledge, and expert prompting. Our results showed that, of the methods tested, the evolved self-critique prompt engineering method achieved the best result, with an F1-score of 0.8."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lozoya-etal-2024-truth">
<titleInfo>
<title>Truth in the Noise: Unveiling Authentic Dementia Self-Disclosure Statements in Social Media with LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">Cabrera</namePart>
<namePart type="family">Lozoya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jude</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Mikal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun</namePart>
<namePart type="given">Leng</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Hemmy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Conway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="given">José</namePart>
<namePart type="family">Rodríguez Méndez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Kuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Canberra, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Identifying self-disclosed health diagnoses in social media data using regular expressions (e.g. “I‘ve been diagnosed with \ensuremath<Disease X\ensuremath>”) is a well-established approach for creating ad hoc cohorts of individuals with specific health conditions. However there is evidence to suggest that this method of identifying individuals is unreliable when creating cohorts for some mental health and neurodegenerative conditions. In the case of dementia, the focus of this paper, diagnostic disclosures are frequently whimsical or sardonic, rather than indicative of an authentic diagnosis or underlying disease state (e.g. “I forgot my keys again. I‘ve got dementia!”). With this work and utilising an annotated corpus of 14,025 dementia diagnostic self-disclosure posts derived from Twitter, we leveraged LLMs to distinguish between “authentic” dementia self-disclosures and “inauthentic” self-disclosures. Specifically, we implemented a genetic algorithm that evolves prompts using various state-of-the-art prompt engineering techniques, including chain of thought, self-critique, generated knowledge, and expert prompting. Our results showed that, of the methods tested, the evolved self-critique prompt engineering method achieved the best result, with an F1-score of 0.8.</abstract>
<identifier type="citekey">lozoya-etal-2024-truth</identifier>
<location>
<url>https://aclanthology.org/2024.alta-1.16/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>189</start>
<end>196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Truth in the Noise: Unveiling Authentic Dementia Self-Disclosure Statements in Social Media with LLMs
%A Lozoya, Daniel Cabrera
%A Mikal, Jude P.
%A Wong, Yun Leng
%A Hemmy, Laura S.
%A Conway, Mike
%Y Baldwin, Tim
%Y Rodríguez Méndez, Sergio José
%Y Kuo, Nicholas
%S Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association
%D 2024
%8 December
%I Association for Computational Linguistics
%C Canberra, Australia
%F lozoya-etal-2024-truth
%X Identifying self-disclosed health diagnoses in social media data using regular expressions (e.g. “I‘ve been diagnosed with \ensuremath<Disease X\ensuremath>”) is a well-established approach for creating ad hoc cohorts of individuals with specific health conditions. However there is evidence to suggest that this method of identifying individuals is unreliable when creating cohorts for some mental health and neurodegenerative conditions. In the case of dementia, the focus of this paper, diagnostic disclosures are frequently whimsical or sardonic, rather than indicative of an authentic diagnosis or underlying disease state (e.g. “I forgot my keys again. I‘ve got dementia!”). With this work and utilising an annotated corpus of 14,025 dementia diagnostic self-disclosure posts derived from Twitter, we leveraged LLMs to distinguish between “authentic” dementia self-disclosures and “inauthentic” self-disclosures. Specifically, we implemented a genetic algorithm that evolves prompts using various state-of-the-art prompt engineering techniques, including chain of thought, self-critique, generated knowledge, and expert prompting. Our results showed that, of the methods tested, the evolved self-critique prompt engineering method achieved the best result, with an F1-score of 0.8.
%U https://aclanthology.org/2024.alta-1.16/
%P 189-196
Markdown (Informal)
[Truth in the Noise: Unveiling Authentic Dementia Self-Disclosure Statements in Social Media with LLMs](https://aclanthology.org/2024.alta-1.16/) (Lozoya et al., ALTA 2024)
ACL