@inproceedings{abdul-hameed-etal-2016-automatic,
title = "Automatic Creation of a Sentence Aligned {S}inhala-{T}amil Parallel Corpus",
author = "Abdul Hameed, Riyafa and
Pathirennehelage, Nadeeshani and
Ihalapathirana, Anusha and
Ziyad Mohamed, Maryam and
Ranathunga, Surangika and
Jayasena, Sanath and
Dias, Gihan and
Fernando, Sandareka",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3713",
pages = "124--132",
abstract = "A sentence aligned parallel corpus is an important prerequisite in statistical machine translation. However, manual creation of such a parallel corpus is time consuming, and requires experts fluent in both languages. Automatic creation of a sentence aligned parallel corpus using parallel text is the solution to this problem. In this paper, we present the first ever empirical evaluation carried out to identify the best method to automatically create a sentence aligned Sinhala-Tamil parallel corpus. Annual reports from Sri Lankan government institutions were used as the parallel text for aligning. Despite both Sinhala and Tamil being under-resourced languages, we were able to achieve an F-score value of 0.791 using a hybrid approach that makes use of a bilingual dictionary.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdul-hameed-etal-2016-automatic">
<titleInfo>
<title>Automatic Creation of a Sentence Aligned Sinhala-Tamil Parallel Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Riyafa</namePart>
<namePart type="family">Abdul Hameed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadeeshani</namePart>
<namePart type="family">Pathirennehelage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anusha</namePart>
<namePart type="family">Ihalapathirana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maryam</namePart>
<namePart type="family">Ziyad Mohamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surangika</namePart>
<namePart type="family">Ranathunga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanath</namePart>
<namePart type="family">Jayasena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gihan</namePart>
<namePart type="family">Dias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandareka</namePart>
<namePart type="family">Fernando</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A sentence aligned parallel corpus is an important prerequisite in statistical machine translation. However, manual creation of such a parallel corpus is time consuming, and requires experts fluent in both languages. Automatic creation of a sentence aligned parallel corpus using parallel text is the solution to this problem. In this paper, we present the first ever empirical evaluation carried out to identify the best method to automatically create a sentence aligned Sinhala-Tamil parallel corpus. Annual reports from Sri Lankan government institutions were used as the parallel text for aligning. Despite both Sinhala and Tamil being under-resourced languages, we were able to achieve an F-score value of 0.791 using a hybrid approach that makes use of a bilingual dictionary.</abstract>
<identifier type="citekey">abdul-hameed-etal-2016-automatic</identifier>
<location>
<url>https://aclanthology.org/W16-3713</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>124</start>
<end>132</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Creation of a Sentence Aligned Sinhala-Tamil Parallel Corpus
%A Abdul Hameed, Riyafa
%A Pathirennehelage, Nadeeshani
%A Ihalapathirana, Anusha
%A Ziyad Mohamed, Maryam
%A Ranathunga, Surangika
%A Jayasena, Sanath
%A Dias, Gihan
%A Fernando, Sandareka
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F abdul-hameed-etal-2016-automatic
%X A sentence aligned parallel corpus is an important prerequisite in statistical machine translation. However, manual creation of such a parallel corpus is time consuming, and requires experts fluent in both languages. Automatic creation of a sentence aligned parallel corpus using parallel text is the solution to this problem. In this paper, we present the first ever empirical evaluation carried out to identify the best method to automatically create a sentence aligned Sinhala-Tamil parallel corpus. Annual reports from Sri Lankan government institutions were used as the parallel text for aligning. Despite both Sinhala and Tamil being under-resourced languages, we were able to achieve an F-score value of 0.791 using a hybrid approach that makes use of a bilingual dictionary.
%U https://aclanthology.org/W16-3713
%P 124-132
Markdown (Informal)
[Automatic Creation of a Sentence Aligned Sinhala-Tamil Parallel Corpus](https://aclanthology.org/W16-3713) (Abdul Hameed et al., WSSANLP 2016)
ACL
- Riyafa Abdul Hameed, Nadeeshani Pathirennehelage, Anusha Ihalapathirana, Maryam Ziyad Mohamed, Surangika Ranathunga, Sanath Jayasena, Gihan Dias, and Sandareka Fernando. 2016. Automatic Creation of a Sentence Aligned Sinhala-Tamil Parallel Corpus. In Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016), pages 124–132, Osaka, Japan. The COLING 2016 Organizing Committee.