@inproceedings{perera-etal-2025-indonlp,
title = "{I}ndo{NLP} 2025 Shared Task: {R}omanized {S}inhala to {S}inhala Reverse Transliteration Using {BERT}",
author = "Perera, Sandun Sameera and
Jayakodi, Lahiru Prabhath and
Sumanathilaka, Deshan Koshala and
Anuradha, Isuri",
editor = "Weerasinghe, Ruvan and
Anuradha, Isuri and
Sumanathilaka, Deshan",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages",
month = jan,
year = "2025",
address = "Abu Dhabi",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.indonlp-1.16/",
pages = "135--140",
abstract = "The Romanized text has become popu lar with the growth of digital communi cation platforms, largely due to the fa miliarity with English keyboards. In Sri Lanka, Romanized Sinhala, commonly re ferred to as {\textquotedblleft}Singlish{\textquotedblright} is widely used in digi tal communications. This paper introduces a novel context-aware back-transliteration system designed to address the ad-hoc typ ing patterns and lexical ambiguity inher ent in Singlish. The proposed system com bines dictionary-based mapping for Singlish words, a rule-based transliteration for out of-vocabulary words and a BERT-based language model for addressing lexical am biguities. Evaluation results demonstrate the robustness of the proposed approach, achieving high BLEU scores along with low Word Error Rate (WER) and Character Er ror Rate (CER) across test datasets. This study provides an effective solution for Ro manized Sinhala back-transliteration and establishes the foundation for improving NLP tools for similar low-resourced lan guages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perera-etal-2025-indonlp">
<titleInfo>
<title>IndoNLP 2025 Shared Task: Romanized Sinhala to Sinhala Reverse Transliteration Using BERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sandun</namePart>
<namePart type="given">Sameera</namePart>
<namePart type="family">Perera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lahiru</namePart>
<namePart type="given">Prabhath</namePart>
<namePart type="family">Jayakodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deshan</namePart>
<namePart type="given">Koshala</namePart>
<namePart type="family">Sumanathilaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isuri</namePart>
<namePart type="family">Anuradha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruvan</namePart>
<namePart type="family">Weerasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isuri</namePart>
<namePart type="family">Anuradha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deshan</namePart>
<namePart type="family">Sumanathilaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Romanized text has become popu lar with the growth of digital communi cation platforms, largely due to the fa miliarity with English keyboards. In Sri Lanka, Romanized Sinhala, commonly re ferred to as “Singlish” is widely used in digi tal communications. This paper introduces a novel context-aware back-transliteration system designed to address the ad-hoc typ ing patterns and lexical ambiguity inher ent in Singlish. The proposed system com bines dictionary-based mapping for Singlish words, a rule-based transliteration for out of-vocabulary words and a BERT-based language model for addressing lexical am biguities. Evaluation results demonstrate the robustness of the proposed approach, achieving high BLEU scores along with low Word Error Rate (WER) and Character Er ror Rate (CER) across test datasets. This study provides an effective solution for Ro manized Sinhala back-transliteration and establishes the foundation for improving NLP tools for similar low-resourced lan guages.</abstract>
<identifier type="citekey">perera-etal-2025-indonlp</identifier>
<location>
<url>https://aclanthology.org/2025.indonlp-1.16/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>135</start>
<end>140</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IndoNLP 2025 Shared Task: Romanized Sinhala to Sinhala Reverse Transliteration Using BERT
%A Perera, Sandun Sameera
%A Jayakodi, Lahiru Prabhath
%A Sumanathilaka, Deshan Koshala
%A Anuradha, Isuri
%Y Weerasinghe, Ruvan
%Y Anuradha, Isuri
%Y Sumanathilaka, Deshan
%S Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi
%F perera-etal-2025-indonlp
%X The Romanized text has become popu lar with the growth of digital communi cation platforms, largely due to the fa miliarity with English keyboards. In Sri Lanka, Romanized Sinhala, commonly re ferred to as “Singlish” is widely used in digi tal communications. This paper introduces a novel context-aware back-transliteration system designed to address the ad-hoc typ ing patterns and lexical ambiguity inher ent in Singlish. The proposed system com bines dictionary-based mapping for Singlish words, a rule-based transliteration for out of-vocabulary words and a BERT-based language model for addressing lexical am biguities. Evaluation results demonstrate the robustness of the proposed approach, achieving high BLEU scores along with low Word Error Rate (WER) and Character Er ror Rate (CER) across test datasets. This study provides an effective solution for Ro manized Sinhala back-transliteration and establishes the foundation for improving NLP tools for similar low-resourced lan guages.
%U https://aclanthology.org/2025.indonlp-1.16/
%P 135-140
Markdown (Informal)
[IndoNLP 2025 Shared Task: Romanized Sinhala to Sinhala Reverse Transliteration Using BERT](https://aclanthology.org/2025.indonlp-1.16/) (Perera et al., IndoNLP 2025)
ACL