@inproceedings{perera-sumanathilaka-2025-low,
title = "A Low-Resource Speech-Driven {NLP} Pipeline for {S}inhala Dyslexia Assistance",
author = "Perera, Peshala Sandali and
Sumanathilaka, Deshan Koshala",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.106/",
pages = "925--933",
abstract = "Dyslexia in adults remains an under-researched and under-served area, particularly in non-English-speaking contexts, despite its significant impact on personal and professional lives. This work addresses that gap by focusing on Sinhala, a low-resource language with limited tools for linguistic accessibility. We present an assistive system designed specifically for Sinhala-speaking adults with dyslexia. The system integrates Whisper for speech-to-text conversion, SinBERT a open sourced fine-tuned BERT model trained for Sinhala to identify common dyslexic errors, and a combined mT5 and Mistral-based model to generate corrected text. Finally, the output is converted back to speech using gTTS, creating a complete multi modal feedback loop. Despite the challenges posed by limited Sinhala-language datasets, the system achieves 66{\%} transcription accuracy and 70{\%} correction accuracy with 65{\%} overall system accuracy. These results demonstrate both the feasibility and effectiveness of the approach. Ultimately, this work highlights the importance of inclusive NLP technologies in underrepresented languages and showcases a practical step toward improving accessibility for adult dyslexic users."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perera-sumanathilaka-2025-low">
<titleInfo>
<title>A Low-Resource Speech-Driven NLP Pipeline for Sinhala Dyslexia Assistance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peshala</namePart>
<namePart type="given">Sandali</namePart>
<namePart type="family">Perera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deshan</namePart>
<namePart type="given">Koshala</namePart>
<namePart type="family">Sumanathilaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dyslexia in adults remains an under-researched and under-served area, particularly in non-English-speaking contexts, despite its significant impact on personal and professional lives. This work addresses that gap by focusing on Sinhala, a low-resource language with limited tools for linguistic accessibility. We present an assistive system designed specifically for Sinhala-speaking adults with dyslexia. The system integrates Whisper for speech-to-text conversion, SinBERT a open sourced fine-tuned BERT model trained for Sinhala to identify common dyslexic errors, and a combined mT5 and Mistral-based model to generate corrected text. Finally, the output is converted back to speech using gTTS, creating a complete multi modal feedback loop. Despite the challenges posed by limited Sinhala-language datasets, the system achieves 66% transcription accuracy and 70% correction accuracy with 65% overall system accuracy. These results demonstrate both the feasibility and effectiveness of the approach. Ultimately, this work highlights the importance of inclusive NLP technologies in underrepresented languages and showcases a practical step toward improving accessibility for adult dyslexic users.</abstract>
<identifier type="citekey">perera-sumanathilaka-2025-low</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.106/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>925</start>
<end>933</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Low-Resource Speech-Driven NLP Pipeline for Sinhala Dyslexia Assistance
%A Perera, Peshala Sandali
%A Sumanathilaka, Deshan Koshala
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F perera-sumanathilaka-2025-low
%X Dyslexia in adults remains an under-researched and under-served area, particularly in non-English-speaking contexts, despite its significant impact on personal and professional lives. This work addresses that gap by focusing on Sinhala, a low-resource language with limited tools for linguistic accessibility. We present an assistive system designed specifically for Sinhala-speaking adults with dyslexia. The system integrates Whisper for speech-to-text conversion, SinBERT a open sourced fine-tuned BERT model trained for Sinhala to identify common dyslexic errors, and a combined mT5 and Mistral-based model to generate corrected text. Finally, the output is converted back to speech using gTTS, creating a complete multi modal feedback loop. Despite the challenges posed by limited Sinhala-language datasets, the system achieves 66% transcription accuracy and 70% correction accuracy with 65% overall system accuracy. These results demonstrate both the feasibility and effectiveness of the approach. Ultimately, this work highlights the importance of inclusive NLP technologies in underrepresented languages and showcases a practical step toward improving accessibility for adult dyslexic users.
%U https://aclanthology.org/2025.ranlp-1.106/
%P 925-933
Markdown (Informal)
[A Low-Resource Speech-Driven NLP Pipeline for Sinhala Dyslexia Assistance](https://aclanthology.org/2025.ranlp-1.106/) (Perera & Sumanathilaka, RANLP 2025)
ACL