@inproceedings{laskar-etal-2020-enascorp1,
title = "{E}n{A}s{C}orp1.0: {E}nglish-{A}ssamese Corpus",
author = "Laskar, Sahinur Rahman and
Khilji, Abdullah Faiz Ur Rahman and
Pakray, Partha and
Bandyopadhyay, Sivaji",
editor = "Karakanta, Alina and
Ojha, Atul Kr. and
Liu, Chao-Hong and
Abbott, Jade and
Ortega, John and
Washington, Jonathan and
Oco, Nathaniel and
Lakew, Surafel Melaku and
Pirinen, Tommi A and
Malykh, Valentin and
Logacheva, Varvara and
Zhao, Xiaobing",
booktitle = "Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.loresmt-1.9",
doi = "10.18653/v1/2020.loresmt-1.9",
pages = "62--68",
abstract = "The corpus preparation is one of the important challenging task for the domain of machine translation especially in low resource language scenarios. Country like India where multiple languages exists, machine translation attempts to minimize the communication gap among people with different linguistic backgrounds. Although Google Translation covers automatic translation of various languages all over the world but it lags in some languages including Assamese. In this paper, we have developed EnAsCorp1.0, corpus of English-Assamese low resource pair where parallel and monolingual data are collected from various online sources. We have also implemented baseline systems with statistical machine translation and neural machine translation approaches for the same corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laskar-etal-2020-enascorp1">
<titleInfo>
<title>EnAsCorp1.0: English-Assamese Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sahinur</namePart>
<namePart type="given">Rahman</namePart>
<namePart type="family">Laskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullah</namePart>
<namePart type="given">Faiz</namePart>
<namePart type="given">Ur</namePart>
<namePart type="given">Rahman</namePart>
<namePart type="family">Khilji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Partha</namePart>
<namePart type="family">Pakray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="family">Karakanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jade</namePart>
<namePart type="family">Abbott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Washington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Oco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surafel</namePart>
<namePart type="given">Melaku</namePart>
<namePart type="family">Lakew</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varvara</namePart>
<namePart type="family">Logacheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobing</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The corpus preparation is one of the important challenging task for the domain of machine translation especially in low resource language scenarios. Country like India where multiple languages exists, machine translation attempts to minimize the communication gap among people with different linguistic backgrounds. Although Google Translation covers automatic translation of various languages all over the world but it lags in some languages including Assamese. In this paper, we have developed EnAsCorp1.0, corpus of English-Assamese low resource pair where parallel and monolingual data are collected from various online sources. We have also implemented baseline systems with statistical machine translation and neural machine translation approaches for the same corpus.</abstract>
<identifier type="citekey">laskar-etal-2020-enascorp1</identifier>
<identifier type="doi">10.18653/v1/2020.loresmt-1.9</identifier>
<location>
<url>https://aclanthology.org/2020.loresmt-1.9</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>62</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EnAsCorp1.0: English-Assamese Corpus
%A Laskar, Sahinur Rahman
%A Khilji, Abdullah Faiz Ur Rahman
%A Pakray, Partha
%A Bandyopadhyay, Sivaji
%Y Karakanta, Alina
%Y Ojha, Atul Kr.
%Y Liu, Chao-Hong
%Y Abbott, Jade
%Y Ortega, John
%Y Washington, Jonathan
%Y Oco, Nathaniel
%Y Lakew, Surafel Melaku
%Y Pirinen, Tommi A.
%Y Malykh, Valentin
%Y Logacheva, Varvara
%Y Zhao, Xiaobing
%S Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F laskar-etal-2020-enascorp1
%X The corpus preparation is one of the important challenging task for the domain of machine translation especially in low resource language scenarios. Country like India where multiple languages exists, machine translation attempts to minimize the communication gap among people with different linguistic backgrounds. Although Google Translation covers automatic translation of various languages all over the world but it lags in some languages including Assamese. In this paper, we have developed EnAsCorp1.0, corpus of English-Assamese low resource pair where parallel and monolingual data are collected from various online sources. We have also implemented baseline systems with statistical machine translation and neural machine translation approaches for the same corpus.
%R 10.18653/v1/2020.loresmt-1.9
%U https://aclanthology.org/2020.loresmt-1.9
%U https://doi.org/10.18653/v1/2020.loresmt-1.9
%P 62-68
Markdown (Informal)
[EnAsCorp1.0: English-Assamese Corpus](https://aclanthology.org/2020.loresmt-1.9) (Laskar et al., LoResMT 2020)
ACL
- Sahinur Rahman Laskar, Abdullah Faiz Ur Rahman Khilji, Partha Pakray, and Sivaji Bandyopadhyay. 2020. EnAsCorp1.0: English-Assamese Corpus. In Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages, pages 62–68, Suzhou, China. Association for Computational Linguistics.