@inproceedings{jannah-etal-2025-multilingual,
title = "Multilingual Symptom Detection on Social Media: Enhancing Health-related Fact-checking with {LLM}s",
author = "Jannah, Saidah Zahrotul and
Aco, Elyanah and
Peng, Shaowen and
Wakamiya, Shoko and
Aramaki, Eiji",
editor = "Akhtar, Mubashara and
Aly, Rami and
Christodoulopoulos, Christos and
Cocarascu, Oana and
Guo, Zhijiang and
Mittal, Arpit and
Schlichtkrull, Michael and
Thorne, James and
Vlachos, Andreas",
booktitle = "Proceedings of the Eighth Fact Extraction and VERification Workshop (FEVER)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.fever-1.4/",
doi = "10.18653/v1/2025.fever-1.4",
pages = "54--68",
ISBN = "978-1-959429-53-1",
abstract = "Social media has emerged as a valueable source for early pandemic detection, as repeated mentions of symptoms by users may signal the onset of an outbreak. However, to be a reliable system, validation through fact-checking and verification against official health records is essential. Without this step, systems risk spreading misinformation to the public. The effectiveness of these systems also depend on their ability to process data in multiple languages, given the multilingual nature of social media data.Yet, many NLP datasets and disease surveillance system remain heavily English-centric, leading to significant performance gaps for low-resource languages.This issue is especially critical in Southeast Asia, where symptom expression may vary culturally and linguistically.Therefore, this study evaluates the symptom detection capabilities of LLMs in social media posts across multiple languages, models, and symptoms to enhance health-related fact-checking. Our results reveal significant language-based discrepancies, with European languages outperforming under-resourced Southeast Asian languages. Furthermore, we identify symptom-specific challenges, particularly in detecting respiratory illnesses such as influenza, which LLMs tend to overpredict.The overestimation or misclassification of symptom mentions can lead to false alarms or public misinformation when deployed in real-world settings. This underscores the importance of symptom detection as a critical first step in medical fact-checking within early outbreak detection systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jannah-etal-2025-multilingual">
<titleInfo>
<title>Multilingual Symptom Detection on Social Media: Enhancing Health-related Fact-checking with LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saidah</namePart>
<namePart type="given">Zahrotul</namePart>
<namePart type="family">Jannah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elyanah</namePart>
<namePart type="family">Aco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaowen</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shoko</namePart>
<namePart type="family">Wakamiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiji</namePart>
<namePart type="family">Aramaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Fact Extraction and VERification Workshop (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mubashara</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rami</namePart>
<namePart type="family">Aly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhijiang</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Schlichtkrull</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-1-959429-53-1</identifier>
</relatedItem>
<abstract>Social media has emerged as a valueable source for early pandemic detection, as repeated mentions of symptoms by users may signal the onset of an outbreak. However, to be a reliable system, validation through fact-checking and verification against official health records is essential. Without this step, systems risk spreading misinformation to the public. The effectiveness of these systems also depend on their ability to process data in multiple languages, given the multilingual nature of social media data.Yet, many NLP datasets and disease surveillance system remain heavily English-centric, leading to significant performance gaps for low-resource languages.This issue is especially critical in Southeast Asia, where symptom expression may vary culturally and linguistically.Therefore, this study evaluates the symptom detection capabilities of LLMs in social media posts across multiple languages, models, and symptoms to enhance health-related fact-checking. Our results reveal significant language-based discrepancies, with European languages outperforming under-resourced Southeast Asian languages. Furthermore, we identify symptom-specific challenges, particularly in detecting respiratory illnesses such as influenza, which LLMs tend to overpredict.The overestimation or misclassification of symptom mentions can lead to false alarms or public misinformation when deployed in real-world settings. This underscores the importance of symptom detection as a critical first step in medical fact-checking within early outbreak detection systems.</abstract>
<identifier type="citekey">jannah-etal-2025-multilingual</identifier>
<identifier type="doi">10.18653/v1/2025.fever-1.4</identifier>
<location>
<url>https://aclanthology.org/2025.fever-1.4/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>54</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Symptom Detection on Social Media: Enhancing Health-related Fact-checking with LLMs
%A Jannah, Saidah Zahrotul
%A Aco, Elyanah
%A Peng, Shaowen
%A Wakamiya, Shoko
%A Aramaki, Eiji
%Y Akhtar, Mubashara
%Y Aly, Rami
%Y Christodoulopoulos, Christos
%Y Cocarascu, Oana
%Y Guo, Zhijiang
%Y Mittal, Arpit
%Y Schlichtkrull, Michael
%Y Thorne, James
%Y Vlachos, Andreas
%S Proceedings of the Eighth Fact Extraction and VERification Workshop (FEVER)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 978-1-959429-53-1
%F jannah-etal-2025-multilingual
%X Social media has emerged as a valueable source for early pandemic detection, as repeated mentions of symptoms by users may signal the onset of an outbreak. However, to be a reliable system, validation through fact-checking and verification against official health records is essential. Without this step, systems risk spreading misinformation to the public. The effectiveness of these systems also depend on their ability to process data in multiple languages, given the multilingual nature of social media data.Yet, many NLP datasets and disease surveillance system remain heavily English-centric, leading to significant performance gaps for low-resource languages.This issue is especially critical in Southeast Asia, where symptom expression may vary culturally and linguistically.Therefore, this study evaluates the symptom detection capabilities of LLMs in social media posts across multiple languages, models, and symptoms to enhance health-related fact-checking. Our results reveal significant language-based discrepancies, with European languages outperforming under-resourced Southeast Asian languages. Furthermore, we identify symptom-specific challenges, particularly in detecting respiratory illnesses such as influenza, which LLMs tend to overpredict.The overestimation or misclassification of symptom mentions can lead to false alarms or public misinformation when deployed in real-world settings. This underscores the importance of symptom detection as a critical first step in medical fact-checking within early outbreak detection systems.
%R 10.18653/v1/2025.fever-1.4
%U https://aclanthology.org/2025.fever-1.4/
%U https://doi.org/10.18653/v1/2025.fever-1.4
%P 54-68
Markdown (Informal)
[Multilingual Symptom Detection on Social Media: Enhancing Health-related Fact-checking with LLMs](https://aclanthology.org/2025.fever-1.4/) (Jannah et al., FEVER 2025)
ACL