@inproceedings{kugathasan-sumathipala-2021-neural,
title = "Neural Machine Translation for {S}inhala-{E}nglish Code-Mixed Text",
author = "Kugathasan, Archchana and
Sumathipala, Sagara",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.82",
pages = "718--726",
abstract = "Code-mixing has become a moving method of communication among multilingual speakers. Most of the social media content of the multilingual societies are written in code-mixed text. However, most of the current translation systems neglect to convert code-mixed texts to a standard language. Most of the user written code-mixed content in social media remains unprocessed due to the unavailability of linguistic resource such as parallel corpus. This paper proposes a Neural Machine Translation(NMT) model to translate the Sinhala-English code-mixed text to the Sinhala language. Due to the limited resources available for Sinhala-English code-mixed(SECM) text, a parallel corpus is created with SECM sentences and Sinhala sentences. Srilankan social media sites contain SECM texts more frequently than the standard languages. The model proposed for code-mixed text translation in this study is a combination of Encoder-Decoder framework with LSTM units and Teachers Forcing Algorithm. The translated sentences from the model are evaluated using BLEU(Bilingual Evaluation Understudy) metric. Our model achieved a remarkable BLEU score for the translation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kugathasan-sumathipala-2021-neural">
<titleInfo>
<title>Neural Machine Translation for Sinhala-English Code-Mixed Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Archchana</namePart>
<namePart type="family">Kugathasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sagara</namePart>
<namePart type="family">Sumathipala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code-mixing has become a moving method of communication among multilingual speakers. Most of the social media content of the multilingual societies are written in code-mixed text. However, most of the current translation systems neglect to convert code-mixed texts to a standard language. Most of the user written code-mixed content in social media remains unprocessed due to the unavailability of linguistic resource such as parallel corpus. This paper proposes a Neural Machine Translation(NMT) model to translate the Sinhala-English code-mixed text to the Sinhala language. Due to the limited resources available for Sinhala-English code-mixed(SECM) text, a parallel corpus is created with SECM sentences and Sinhala sentences. Srilankan social media sites contain SECM texts more frequently than the standard languages. The model proposed for code-mixed text translation in this study is a combination of Encoder-Decoder framework with LSTM units and Teachers Forcing Algorithm. The translated sentences from the model are evaluated using BLEU(Bilingual Evaluation Understudy) metric. Our model achieved a remarkable BLEU score for the translation.</abstract>
<identifier type="citekey">kugathasan-sumathipala-2021-neural</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.82</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>718</start>
<end>726</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Machine Translation for Sinhala-English Code-Mixed Text
%A Kugathasan, Archchana
%A Sumathipala, Sagara
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F kugathasan-sumathipala-2021-neural
%X Code-mixing has become a moving method of communication among multilingual speakers. Most of the social media content of the multilingual societies are written in code-mixed text. However, most of the current translation systems neglect to convert code-mixed texts to a standard language. Most of the user written code-mixed content in social media remains unprocessed due to the unavailability of linguistic resource such as parallel corpus. This paper proposes a Neural Machine Translation(NMT) model to translate the Sinhala-English code-mixed text to the Sinhala language. Due to the limited resources available for Sinhala-English code-mixed(SECM) text, a parallel corpus is created with SECM sentences and Sinhala sentences. Srilankan social media sites contain SECM texts more frequently than the standard languages. The model proposed for code-mixed text translation in this study is a combination of Encoder-Decoder framework with LSTM units and Teachers Forcing Algorithm. The translated sentences from the model are evaluated using BLEU(Bilingual Evaluation Understudy) metric. Our model achieved a remarkable BLEU score for the translation.
%U https://aclanthology.org/2021.ranlp-1.82
%P 718-726
Markdown (Informal)
[Neural Machine Translation for Sinhala-English Code-Mixed Text](https://aclanthology.org/2021.ranlp-1.82) (Kugathasan & Sumathipala, RANLP 2021)
ACL