@inproceedings{rohanian-2017-multi,
title = "Multi-Document Summarization of {P}ersian Text using Paragraph Vectors",
author = "Rohanian, Morteza",
editor = "Kovatchev, Venelin and
Temnikova, Irina and
Gencheva, Pepa and
Kiprov, Yasen and
Nikolova, Ivelina",
booktitle = "Proceedings of the Student Research Workshop Associated with {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/issn.1314-9156.2017_005",
doi = "10.26615/issn.1314-9156.2017_005",
pages = "35--40",
abstract = "A multi-document summarizer finds the key topics from multiple textual sources and organizes information around them. In this paper we propose a summarization method for Persian text using paragraph vectors that can represent textual units of arbitrary lengths. We use these vectors to calculate the semantic relatedness between documents, cluster them to a number of predetermined groups, weight them based on their distance to the centroids and the intra-cluster homogeneity and take out the key paragraphs. We compare the final summaries with the gold-standard summaries of 21 digital topics using the ROUGE evaluation metric. Experimental results show the advantages of using paragraph vectors over earlier attempts at developing similar methods for a low resource language like Persian.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rohanian-2017-multi">
<titleInfo>
<title>Multi-Document Summarization of Persian Text using Paragraph Vectors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Morteza</namePart>
<namePart type="family">Rohanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Student Research Workshop Associated with RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Venelin</namePart>
<namePart type="family">Kovatchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Temnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pepa</namePart>
<namePart type="family">Gencheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasen</namePart>
<namePart type="family">Kiprov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Nikolova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A multi-document summarizer finds the key topics from multiple textual sources and organizes information around them. In this paper we propose a summarization method for Persian text using paragraph vectors that can represent textual units of arbitrary lengths. We use these vectors to calculate the semantic relatedness between documents, cluster them to a number of predetermined groups, weight them based on their distance to the centroids and the intra-cluster homogeneity and take out the key paragraphs. We compare the final summaries with the gold-standard summaries of 21 digital topics using the ROUGE evaluation metric. Experimental results show the advantages of using paragraph vectors over earlier attempts at developing similar methods for a low resource language like Persian.</abstract>
<identifier type="citekey">rohanian-2017-multi</identifier>
<identifier type="doi">10.26615/issn.1314-9156.2017_005</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>35</start>
<end>40</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Document Summarization of Persian Text using Paragraph Vectors
%A Rohanian, Morteza
%Y Kovatchev, Venelin
%Y Temnikova, Irina
%Y Gencheva, Pepa
%Y Kiprov, Yasen
%Y Nikolova, Ivelina
%S Proceedings of the Student Research Workshop Associated with RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna
%F rohanian-2017-multi
%X A multi-document summarizer finds the key topics from multiple textual sources and organizes information around them. In this paper we propose a summarization method for Persian text using paragraph vectors that can represent textual units of arbitrary lengths. We use these vectors to calculate the semantic relatedness between documents, cluster them to a number of predetermined groups, weight them based on their distance to the centroids and the intra-cluster homogeneity and take out the key paragraphs. We compare the final summaries with the gold-standard summaries of 21 digital topics using the ROUGE evaluation metric. Experimental results show the advantages of using paragraph vectors over earlier attempts at developing similar methods for a low resource language like Persian.
%R 10.26615/issn.1314-9156.2017_005
%U https://doi.org/10.26615/issn.1314-9156.2017_005
%P 35-40
Markdown (Informal)
[Multi-Document Summarization of Persian Text using Paragraph Vectors](https://doi.org/10.26615/issn.1314-9156.2017_005) (Rohanian, RANLP 2017)
ACL