@inproceedings{de-mel-etal-2025-sinhala,
title = "{S}inhala Transliteration: A Comparative Analysis Between Rule-based and {S}eq2{S}eq Approaches",
author = "De Mel, Widanalage Mario Yomal and
Wickramasinghe, Kasun Imesha and
de Silva, Nisansa and
Ranathunga, Surangika Dayani",
editor = "Weerasinghe, Ruvan and
Anuradha, Isuri and
Sumanathilaka, Deshan",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages",
month = jan,
year = "2025",
address = "Abu Dhabi",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.indonlp-1.19/",
pages = "166--173",
abstract = "Due to reasons of convenience and lack of tech literacy, transliteration (i.e., Romanizing native scripts instead of using localization tools) is eminently prevalent in the context of low-resource languages such as Sinhala, which have their own writing script. In this study, our focus is on Romanized Sinhala transliteration. We propose two methods to address this problem: Our baseline is a rule-based method, which is then compared against our second method where we approach the transliteration problem as a sequence-to-sequence task akin to the established Neural Machine Translation (NMT) task. For the latter, we propose a Transformer based Encode-Decoder solution. We witnessed that the Transformer-based method could grab many ad-hoc patterns within the Romanized scripts compared to the rule-based method."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-mel-etal-2025-sinhala">
<titleInfo>
<title>Sinhala Transliteration: A Comparative Analysis Between Rule-based and Seq2Seq Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Widanalage</namePart>
<namePart type="given">Mario</namePart>
<namePart type="given">Yomal</namePart>
<namePart type="family">De Mel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kasun</namePart>
<namePart type="given">Imesha</namePart>
<namePart type="family">Wickramasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nisansa</namePart>
<namePart type="family">de Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surangika</namePart>
<namePart type="given">Dayani</namePart>
<namePart type="family">Ranathunga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruvan</namePart>
<namePart type="family">Weerasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isuri</namePart>
<namePart type="family">Anuradha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deshan</namePart>
<namePart type="family">Sumanathilaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Due to reasons of convenience and lack of tech literacy, transliteration (i.e., Romanizing native scripts instead of using localization tools) is eminently prevalent in the context of low-resource languages such as Sinhala, which have their own writing script. In this study, our focus is on Romanized Sinhala transliteration. We propose two methods to address this problem: Our baseline is a rule-based method, which is then compared against our second method where we approach the transliteration problem as a sequence-to-sequence task akin to the established Neural Machine Translation (NMT) task. For the latter, we propose a Transformer based Encode-Decoder solution. We witnessed that the Transformer-based method could grab many ad-hoc patterns within the Romanized scripts compared to the rule-based method.</abstract>
<identifier type="citekey">de-mel-etal-2025-sinhala</identifier>
<location>
<url>https://aclanthology.org/2025.indonlp-1.19/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>166</start>
<end>173</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sinhala Transliteration: A Comparative Analysis Between Rule-based and Seq2Seq Approaches
%A De Mel, Widanalage Mario Yomal
%A Wickramasinghe, Kasun Imesha
%A de Silva, Nisansa
%A Ranathunga, Surangika Dayani
%Y Weerasinghe, Ruvan
%Y Anuradha, Isuri
%Y Sumanathilaka, Deshan
%S Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi
%F de-mel-etal-2025-sinhala
%X Due to reasons of convenience and lack of tech literacy, transliteration (i.e., Romanizing native scripts instead of using localization tools) is eminently prevalent in the context of low-resource languages such as Sinhala, which have their own writing script. In this study, our focus is on Romanized Sinhala transliteration. We propose two methods to address this problem: Our baseline is a rule-based method, which is then compared against our second method where we approach the transliteration problem as a sequence-to-sequence task akin to the established Neural Machine Translation (NMT) task. For the latter, we propose a Transformer based Encode-Decoder solution. We witnessed that the Transformer-based method could grab many ad-hoc patterns within the Romanized scripts compared to the rule-based method.
%U https://aclanthology.org/2025.indonlp-1.19/
%P 166-173
Markdown (Informal)
[Sinhala Transliteration: A Comparative Analysis Between Rule-based and Seq2Seq Approaches](https://aclanthology.org/2025.indonlp-1.19/) (De Mel et al., IndoNLP 2025)
ACL