@inproceedings{ghosal-etal-2018-novelty,
title = "Novelty Goes Deep. A Deep Neural Solution To Document Level Novelty Detection",
author = "Ghosal, Tirthankar and
Edithal, Vignesh and
Ekbal, Asif and
Bhattacharyya, Pushpak and
Tsatsaronis, George and
Chivukula, Srinivasa Satya Sameer Kumar",
editor = "Bender, Emily M. and
Derczynski, Leon and
Isabelle, Pierre",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1237/",
pages = "2802--2813",
abstract = "The rapid growth of documents across the web has necessitated finding means of discarding redundant documents and retaining novel ones. Capturing redundancy is challenging as it may involve investigating at a deep semantic level. Techniques for detecting such semantic redundancy at the document level are scarce. In this work we propose a deep Convolutional Neural Networks (CNN) based model to classify a document as novel or redundant with respect to a set of relevant documents already seen by the system. The system is simple and do not require any manual feature engineering. Our novel scheme encodes relevant and relative information from both source and target texts to generate an intermediate representation which we coin as the Relative Document Vector (RDV). The proposed method outperforms the existing state-of-the-art on a document-level novelty detection dataset by a margin of {\ensuremath{\sim}}5{\%} in terms of accuracy. We further demonstrate the effectiveness of our approach on a standard paraphrase detection dataset where paraphrased passages closely resemble to semantically redundant documents."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghosal-etal-2018-novelty">
<titleInfo>
<title>Novelty Goes Deep. A Deep Neural Solution To Document Level Novelty Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vignesh</namePart>
<namePart type="family">Edithal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Tsatsaronis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivasa</namePart>
<namePart type="given">Satya</namePart>
<namePart type="given">Sameer</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Chivukula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Isabelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The rapid growth of documents across the web has necessitated finding means of discarding redundant documents and retaining novel ones. Capturing redundancy is challenging as it may involve investigating at a deep semantic level. Techniques for detecting such semantic redundancy at the document level are scarce. In this work we propose a deep Convolutional Neural Networks (CNN) based model to classify a document as novel or redundant with respect to a set of relevant documents already seen by the system. The system is simple and do not require any manual feature engineering. Our novel scheme encodes relevant and relative information from both source and target texts to generate an intermediate representation which we coin as the Relative Document Vector (RDV). The proposed method outperforms the existing state-of-the-art on a document-level novelty detection dataset by a margin of \ensuremath\sim5% in terms of accuracy. We further demonstrate the effectiveness of our approach on a standard paraphrase detection dataset where paraphrased passages closely resemble to semantically redundant documents.</abstract>
<identifier type="citekey">ghosal-etal-2018-novelty</identifier>
<location>
<url>https://aclanthology.org/C18-1237/</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>2802</start>
<end>2813</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Novelty Goes Deep. A Deep Neural Solution To Document Level Novelty Detection
%A Ghosal, Tirthankar
%A Edithal, Vignesh
%A Ekbal, Asif
%A Bhattacharyya, Pushpak
%A Tsatsaronis, George
%A Chivukula, Srinivasa Satya Sameer Kumar
%Y Bender, Emily M.
%Y Derczynski, Leon
%Y Isabelle, Pierre
%S Proceedings of the 27th International Conference on Computational Linguistics
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F ghosal-etal-2018-novelty
%X The rapid growth of documents across the web has necessitated finding means of discarding redundant documents and retaining novel ones. Capturing redundancy is challenging as it may involve investigating at a deep semantic level. Techniques for detecting such semantic redundancy at the document level are scarce. In this work we propose a deep Convolutional Neural Networks (CNN) based model to classify a document as novel or redundant with respect to a set of relevant documents already seen by the system. The system is simple and do not require any manual feature engineering. Our novel scheme encodes relevant and relative information from both source and target texts to generate an intermediate representation which we coin as the Relative Document Vector (RDV). The proposed method outperforms the existing state-of-the-art on a document-level novelty detection dataset by a margin of \ensuremath\sim5% in terms of accuracy. We further demonstrate the effectiveness of our approach on a standard paraphrase detection dataset where paraphrased passages closely resemble to semantically redundant documents.
%U https://aclanthology.org/C18-1237/
%P 2802-2813
Markdown (Informal)
[Novelty Goes Deep. A Deep Neural Solution To Document Level Novelty Detection](https://aclanthology.org/C18-1237/) (Ghosal et al., COLING 2018)
ACL
- Tirthankar Ghosal, Vignesh Edithal, Asif Ekbal, Pushpak Bhattacharyya, George Tsatsaronis, and Srinivasa Satya Sameer Kumar Chivukula. 2018. Novelty Goes Deep. A Deep Neural Solution To Document Level Novelty Detection. In Proceedings of the 27th International Conference on Computational Linguistics, pages 2802–2813, Santa Fe, New Mexico, USA. Association for Computational Linguistics.