@inproceedings{lehal-goyal-2020-extracting,
title = "{EXTRACTING} {PARALLEL} {PHRASES} {FROM} {COMPARABLE} {ENGLISH} {AND} {PUNJABI} {CORPORA} {USING} {AN} {INTEGRATED} {APPROACH}",
author = "Lehal, Manpreet Singh and
Goyal, Vishal",
editor = "Goyal, Vishal and
Ekbal, Asif",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON): System Demonstrations",
month = dec,
year = "2020",
address = "Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-demos.4",
pages = "10--12",
abstract = "Machine translation from English to Indian languages is always a difficult task due to the unavailability of a good quality corpus and morphological richness in the Indian languages. For a system to produce better translations, the size of the corpus should be huge. We have employed three similarity and distance measures for the research and developed a software to extract parallel data from comparable corpora automatically with high precision using minimal resources. The software works upon four algorithms. The three algorithms have been used for finding Cosine Similarity, Euclidean Distance Similarity and Jaccard Similarity. The fourth algorithm is to integrate the outputs of the three algorithms in order to improve the efficiency of the system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lehal-goyal-2020-extracting">
<titleInfo>
<title>EXTRACTING PARALLEL PHRASES FROM COMPARABLE ENGLISH AND PUNJABI CORPORA USING AN INTEGRATED APPROACH</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manpreet</namePart>
<namePart type="given">Singh</namePart>
<namePart type="family">Lehal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON): System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation from English to Indian languages is always a difficult task due to the unavailability of a good quality corpus and morphological richness in the Indian languages. For a system to produce better translations, the size of the corpus should be huge. We have employed three similarity and distance measures for the research and developed a software to extract parallel data from comparable corpora automatically with high precision using minimal resources. The software works upon four algorithms. The three algorithms have been used for finding Cosine Similarity, Euclidean Distance Similarity and Jaccard Similarity. The fourth algorithm is to integrate the outputs of the three algorithms in order to improve the efficiency of the system.</abstract>
<identifier type="citekey">lehal-goyal-2020-extracting</identifier>
<location>
<url>https://aclanthology.org/2020.icon-demos.4</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>10</start>
<end>12</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EXTRACTING PARALLEL PHRASES FROM COMPARABLE ENGLISH AND PUNJABI CORPORA USING AN INTEGRATED APPROACH
%A Lehal, Manpreet Singh
%A Goyal, Vishal
%Y Goyal, Vishal
%Y Ekbal, Asif
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON): System Demonstrations
%D 2020
%8 December
%I NLP Association of India (NLPAI)
%C Patna, India
%F lehal-goyal-2020-extracting
%X Machine translation from English to Indian languages is always a difficult task due to the unavailability of a good quality corpus and morphological richness in the Indian languages. For a system to produce better translations, the size of the corpus should be huge. We have employed three similarity and distance measures for the research and developed a software to extract parallel data from comparable corpora automatically with high precision using minimal resources. The software works upon four algorithms. The three algorithms have been used for finding Cosine Similarity, Euclidean Distance Similarity and Jaccard Similarity. The fourth algorithm is to integrate the outputs of the three algorithms in order to improve the efficiency of the system.
%U https://aclanthology.org/2020.icon-demos.4
%P 10-12
Markdown (Informal)
[EXTRACTING PARALLEL PHRASES FROM COMPARABLE ENGLISH AND PUNJABI CORPORA USING AN INTEGRATED APPROACH](https://aclanthology.org/2020.icon-demos.4) (Lehal & Goyal, ICON 2020)
ACL