@inproceedings{shrestha-2011-alignment,
title = "Alignment of Monolingual Corpus by Reduction of the Search Space",
author = "Shrestha, Prajol",
editor = "Lopez, C{\'e}dric",
booktitle = "Actes de la 18e conf{\'e}rence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues",
month = jun,
year = "2011",
address = "Montpellier, France",
publisher = "ATALA",
url = "https://aclanthology.org/2011.jeptalnrecital-recital.5",
pages = "48--56",
abstract = "Monolingual comparable corpora annotated with alignments between text segments (paragraphs, sentences, etc.) based on similarity are used in a wide range of natural language processing applications like plagiarism detection, information retrieval, summarization and so on. The drawback wanting to use them is that there aren{'}t many standard corpora which are aligned. Due to this drawback, the corpus is manually created, which is a time consuming and costly task. In this paper, we propose a method to significantly reduce the search space for manual alignment of the monolingual comparable corpus which in turn makes the alignment process faster and easier. This method can be used in making alignments on different levels of text segments. Using this method we create our own gold corpus aligned on the level of paragraph, which will be used for testing and building our algorithms for automatic alignment. We also present some experiments for the reduction of search space on the basis of stem overlap, word overlap, and cosine similarity measure which help us automatize the process to some extent and reduce human effort for alignment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shrestha-2011-alignment">
<titleInfo>
<title>Alignment of Monolingual Corpus by Reduction of the Search Space</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prajol</namePart>
<namePart type="family">Shrestha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la 18e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cédric</namePart>
<namePart type="family">Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Montpellier, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Monolingual comparable corpora annotated with alignments between text segments (paragraphs, sentences, etc.) based on similarity are used in a wide range of natural language processing applications like plagiarism detection, information retrieval, summarization and so on. The drawback wanting to use them is that there aren’t many standard corpora which are aligned. Due to this drawback, the corpus is manually created, which is a time consuming and costly task. In this paper, we propose a method to significantly reduce the search space for manual alignment of the monolingual comparable corpus which in turn makes the alignment process faster and easier. This method can be used in making alignments on different levels of text segments. Using this method we create our own gold corpus aligned on the level of paragraph, which will be used for testing and building our algorithms for automatic alignment. We also present some experiments for the reduction of search space on the basis of stem overlap, word overlap, and cosine similarity measure which help us automatize the process to some extent and reduce human effort for alignment.</abstract>
<identifier type="citekey">shrestha-2011-alignment</identifier>
<location>
<url>https://aclanthology.org/2011.jeptalnrecital-recital.5</url>
</location>
<part>
<date>2011-06</date>
<extent unit="page">
<start>48</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Alignment of Monolingual Corpus by Reduction of the Search Space
%A Shrestha, Prajol
%Y Lopez, Cédric
%S Actes de la 18e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues
%D 2011
%8 June
%I ATALA
%C Montpellier, France
%F shrestha-2011-alignment
%X Monolingual comparable corpora annotated with alignments between text segments (paragraphs, sentences, etc.) based on similarity are used in a wide range of natural language processing applications like plagiarism detection, information retrieval, summarization and so on. The drawback wanting to use them is that there aren’t many standard corpora which are aligned. Due to this drawback, the corpus is manually created, which is a time consuming and costly task. In this paper, we propose a method to significantly reduce the search space for manual alignment of the monolingual comparable corpus which in turn makes the alignment process faster and easier. This method can be used in making alignments on different levels of text segments. Using this method we create our own gold corpus aligned on the level of paragraph, which will be used for testing and building our algorithms for automatic alignment. We also present some experiments for the reduction of search space on the basis of stem overlap, word overlap, and cosine similarity measure which help us automatize the process to some extent and reduce human effort for alignment.
%U https://aclanthology.org/2011.jeptalnrecital-recital.5
%P 48-56
Markdown (Informal)
[Alignment of Monolingual Corpus by Reduction of the Search Space](https://aclanthology.org/2011.jeptalnrecital-recital.5) (Shrestha, JEP/TALN/RECITAL 2011)
ACL