@inproceedings{cardon-grabar-2020-reducing,
title = "Reducing the Search Space for Parallel Sentences in Comparable Corpora",
author = "Cardon, R{\'e}mi and
Grabar, Natalia",
booktitle = "Proceedings of the 13th Workshop on Building and Using Comparable Corpora",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.bucc-1.7",
pages = "44--48",
abstract = "This paper describes and evaluates simple techniques for reducing the research space for parallel sentences in monolingual comparable corpora. Initially, when searching for parallel sentences between two comparable documents, all the possible sentence pairs between the documents have to be considered, which introduces a great degree of imbalance between parallel pairs and non-parallel pairs. This is a problem because even with a high performing algorithm, a lot of noise will be present in the extracted results, thus introducing a need for an extensive and costly manual check phase. We work on a manually annotated subset obtained from a French comparable corpus and show how we can drastically reduce the number of sentence pairs that have to be fed to a classifier so that the results can be manually handled.",
language = "English",
ISBN = "979-10-95546-42-9",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cardon-grabar-2020-reducing">
<titleInfo>
<title>Reducing the Search Space for Parallel Sentences in Comparable Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rémi</namePart>
<namePart type="family">Cardon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="family">Grabar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on Building and Using Comparable Corpora</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-42-9</identifier>
</relatedItem>
<abstract>This paper describes and evaluates simple techniques for reducing the research space for parallel sentences in monolingual comparable corpora. Initially, when searching for parallel sentences between two comparable documents, all the possible sentence pairs between the documents have to be considered, which introduces a great degree of imbalance between parallel pairs and non-parallel pairs. This is a problem because even with a high performing algorithm, a lot of noise will be present in the extracted results, thus introducing a need for an extensive and costly manual check phase. We work on a manually annotated subset obtained from a French comparable corpus and show how we can drastically reduce the number of sentence pairs that have to be fed to a classifier so that the results can be manually handled.</abstract>
<identifier type="citekey">cardon-grabar-2020-reducing</identifier>
<location>
<url>https://aclanthology.org/2020.bucc-1.7</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>44</start>
<end>48</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reducing the Search Space for Parallel Sentences in Comparable Corpora
%A Cardon, Rémi
%A Grabar, Natalia
%S Proceedings of the 13th Workshop on Building and Using Comparable Corpora
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-42-9
%G English
%F cardon-grabar-2020-reducing
%X This paper describes and evaluates simple techniques for reducing the research space for parallel sentences in monolingual comparable corpora. Initially, when searching for parallel sentences between two comparable documents, all the possible sentence pairs between the documents have to be considered, which introduces a great degree of imbalance between parallel pairs and non-parallel pairs. This is a problem because even with a high performing algorithm, a lot of noise will be present in the extracted results, thus introducing a need for an extensive and costly manual check phase. We work on a manually annotated subset obtained from a French comparable corpus and show how we can drastically reduce the number of sentence pairs that have to be fed to a classifier so that the results can be manually handled.
%U https://aclanthology.org/2020.bucc-1.7
%P 44-48
Markdown (Informal)
[Reducing the Search Space for Parallel Sentences in Comparable Corpora](https://aclanthology.org/2020.bucc-1.7) (Cardon & Grabar, BUCC 2020)
ACL