@inproceedings{mahata-etal-2017-bucc2017,
title = "{BUCC}2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora",
author = "Mahata, Sainik and
Das, Dipankar and
Bandyopadhyay, Sivaji",
editor = "Sharoff, Serge and
Zweigenbaum, Pierre and
Rapp, Reinhard",
booktitle = "Proceedings of the 10th Workshop on Building and Using Comparable Corpora",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2511",
doi = "10.18653/v1/W17-2511",
pages = "56--59",
abstract = "A Statistical Machine Translation (SMT) system is always trained using large parallel corpus to produce effective translation. Not only is the corpus scarce, it also involves a lot of manual labor and cost. Parallel corpus can be prepared by employing comparable corpora where a pair of corpora is in two different languages pointing to the same domain. In the present work, we try to build a parallel corpus for French-English language pair from a given comparable corpus. The data and the problem set are provided as part of the shared task organized by BUCC 2017. We have proposed a system that first translates the sentences by heavily relying on Moses and then group the sentences based on sentence length similarity. Finally, the one to one sentence selection was done based on Cosine Similarity algorithm.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahata-etal-2017-bucc2017">
<titleInfo>
<title>BUCC2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sainik</namePart>
<namePart type="family">Mahata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipankar</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Building and Using Comparable Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A Statistical Machine Translation (SMT) system is always trained using large parallel corpus to produce effective translation. Not only is the corpus scarce, it also involves a lot of manual labor and cost. Parallel corpus can be prepared by employing comparable corpora where a pair of corpora is in two different languages pointing to the same domain. In the present work, we try to build a parallel corpus for French-English language pair from a given comparable corpus. The data and the problem set are provided as part of the shared task organized by BUCC 2017. We have proposed a system that first translates the sentences by heavily relying on Moses and then group the sentences based on sentence length similarity. Finally, the one to one sentence selection was done based on Cosine Similarity algorithm.</abstract>
<identifier type="citekey">mahata-etal-2017-bucc2017</identifier>
<identifier type="doi">10.18653/v1/W17-2511</identifier>
<location>
<url>https://aclanthology.org/W17-2511</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>56</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BUCC2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora
%A Mahata, Sainik
%A Das, Dipankar
%A Bandyopadhyay, Sivaji
%Y Sharoff, Serge
%Y Zweigenbaum, Pierre
%Y Rapp, Reinhard
%S Proceedings of the 10th Workshop on Building and Using Comparable Corpora
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F mahata-etal-2017-bucc2017
%X A Statistical Machine Translation (SMT) system is always trained using large parallel corpus to produce effective translation. Not only is the corpus scarce, it also involves a lot of manual labor and cost. Parallel corpus can be prepared by employing comparable corpora where a pair of corpora is in two different languages pointing to the same domain. In the present work, we try to build a parallel corpus for French-English language pair from a given comparable corpus. The data and the problem set are provided as part of the shared task organized by BUCC 2017. We have proposed a system that first translates the sentences by heavily relying on Moses and then group the sentences based on sentence length similarity. Finally, the one to one sentence selection was done based on Cosine Similarity algorithm.
%R 10.18653/v1/W17-2511
%U https://aclanthology.org/W17-2511
%U https://doi.org/10.18653/v1/W17-2511
%P 56-59
Markdown (Informal)
[BUCC2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora](https://aclanthology.org/W17-2511) (Mahata et al., BUCC 2017)
ACL