@inproceedings{ali-etal-2025-moz,
title = "{MOZ}-Smishing: A Benchmark Dataset for Detecting Mobile Money Frauds",
author = "Ali, Felermino D. M. A. and
Saide, Saide M. and
Sousa-Silva, Rui and
Lopes Cardoso, Henrique",
editor = "Lignos, Constantine and
Abdulmumin, Idris and
Adelani, David",
booktitle = "Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.africanlp-1.23/",
doi = "10.18653/v1/2025.africanlp-1.23",
pages = "158--166",
ISBN = "979-8-89176-257-2",
abstract = "Despite the increasing prevalence of smishing attacks targeting Mobile Money Transfer systems, there is a notable lack of publicly available SMS phishing datasets in this domain. This study seeks to address this gap by creating a specialized dataset designed to detect smishing attacks aimed at Mobile Money Transfer users. The data set consists of crowd-sourced text messages from Mozambican mobile users, meticulously annotated into two categories: legitimate messages (ham) and fraudulent smishing attempts (spam). The messages are written in Portuguese, often incorporating microtext styles and linguistic nuances unique to the Mozambican context.We also investigate the effectiveness of LLMs in detecting smishing. Using in-context learning approaches, we evaluate the models' ability to identify smishing attempts without requiring extensive task-specific training. The data set is released under an open license at the following link: huggingface-Anonymous"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ali-etal-2025-moz">
<titleInfo>
<title>MOZ-Smishing: A Benchmark Dataset for Detecting Mobile Money Frauds</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felermino</namePart>
<namePart type="given">D</namePart>
<namePart type="given">M</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saide</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Saide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Sousa-Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henrique</namePart>
<namePart type="family">Lopes Cardoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Adelani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-257-2</identifier>
</relatedItem>
<abstract>Despite the increasing prevalence of smishing attacks targeting Mobile Money Transfer systems, there is a notable lack of publicly available SMS phishing datasets in this domain. This study seeks to address this gap by creating a specialized dataset designed to detect smishing attacks aimed at Mobile Money Transfer users. The data set consists of crowd-sourced text messages from Mozambican mobile users, meticulously annotated into two categories: legitimate messages (ham) and fraudulent smishing attempts (spam). The messages are written in Portuguese, often incorporating microtext styles and linguistic nuances unique to the Mozambican context.We also investigate the effectiveness of LLMs in detecting smishing. Using in-context learning approaches, we evaluate the models’ ability to identify smishing attempts without requiring extensive task-specific training. The data set is released under an open license at the following link: huggingface-Anonymous</abstract>
<identifier type="citekey">ali-etal-2025-moz</identifier>
<identifier type="doi">10.18653/v1/2025.africanlp-1.23</identifier>
<location>
<url>https://aclanthology.org/2025.africanlp-1.23/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>158</start>
<end>166</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MOZ-Smishing: A Benchmark Dataset for Detecting Mobile Money Frauds
%A Ali, Felermino D. M. A.
%A Saide, Saide M.
%A Sousa-Silva, Rui
%A Lopes Cardoso, Henrique
%Y Lignos, Constantine
%Y Abdulmumin, Idris
%Y Adelani, David
%S Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-257-2
%F ali-etal-2025-moz
%X Despite the increasing prevalence of smishing attacks targeting Mobile Money Transfer systems, there is a notable lack of publicly available SMS phishing datasets in this domain. This study seeks to address this gap by creating a specialized dataset designed to detect smishing attacks aimed at Mobile Money Transfer users. The data set consists of crowd-sourced text messages from Mozambican mobile users, meticulously annotated into two categories: legitimate messages (ham) and fraudulent smishing attempts (spam). The messages are written in Portuguese, often incorporating microtext styles and linguistic nuances unique to the Mozambican context.We also investigate the effectiveness of LLMs in detecting smishing. Using in-context learning approaches, we evaluate the models’ ability to identify smishing attempts without requiring extensive task-specific training. The data set is released under an open license at the following link: huggingface-Anonymous
%R 10.18653/v1/2025.africanlp-1.23
%U https://aclanthology.org/2025.africanlp-1.23/
%U https://doi.org/10.18653/v1/2025.africanlp-1.23
%P 158-166
Markdown (Informal)
[MOZ-Smishing: A Benchmark Dataset for Detecting Mobile Money Frauds](https://aclanthology.org/2025.africanlp-1.23/) (Ali et al., AfricaNLP 2025)
ACL