@inproceedings{gorelick-etal-2021-syntax,
title = "Syntax and Themes: How Context Free Grammar Rules and Semantic Word Association Influence Book Success",
author = "Gorelick, Henry and
Bijoy, Biddut Sarker and
Jannatus Saba, Syeda and
Kar, Sudipta and
Islam, Md Saiful and
Amin, Mohammad Ruhul",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.53",
pages = "463--474",
abstract = "In this paper, we attempt to improve upon the state-of-the-art in predicting a novel{'}s success by modeling the lexical semantic relationships of its contents. We created the largest dataset used in such a project containing lexical data from 17,962 books from Project Gutenberg. We utilized domain specific feature reduction techniques to implement the most accurate models to date for predicting book success, with our best model achieving an average accuracy of 94.0{\%}. By analyzing the model parameters, we extracted the successful semantic relationships from books of 12 different genres. We finally mapped those semantic relations to a set of themes, as defined in Roget{'}s Thesaurus and discovered the themes that successful books of a given genre prioritize. At the end of the paper, we further showed that our model demonstrate similar performance for book success prediction even when Goodreads rating was used instead of download count to measure success.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gorelick-etal-2021-syntax">
<titleInfo>
<title>Syntax and Themes: How Context Free Grammar Rules and Semantic Word Association Influence Book Success</title>
</titleInfo>
<name type="personal">
<namePart type="given">Henry</namePart>
<namePart type="family">Gorelick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Biddut</namePart>
<namePart type="given">Sarker</namePart>
<namePart type="family">Bijoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syeda</namePart>
<namePart type="family">Jannatus Saba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Saiful</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Ruhul</namePart>
<namePart type="family">Amin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we attempt to improve upon the state-of-the-art in predicting a novel’s success by modeling the lexical semantic relationships of its contents. We created the largest dataset used in such a project containing lexical data from 17,962 books from Project Gutenberg. We utilized domain specific feature reduction techniques to implement the most accurate models to date for predicting book success, with our best model achieving an average accuracy of 94.0%. By analyzing the model parameters, we extracted the successful semantic relationships from books of 12 different genres. We finally mapped those semantic relations to a set of themes, as defined in Roget’s Thesaurus and discovered the themes that successful books of a given genre prioritize. At the end of the paper, we further showed that our model demonstrate similar performance for book success prediction even when Goodreads rating was used instead of download count to measure success.</abstract>
<identifier type="citekey">gorelick-etal-2021-syntax</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.53</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>463</start>
<end>474</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Syntax and Themes: How Context Free Grammar Rules and Semantic Word Association Influence Book Success
%A Gorelick, Henry
%A Bijoy, Biddut Sarker
%A Jannatus Saba, Syeda
%A Kar, Sudipta
%A Islam, Md Saiful
%A Amin, Mohammad Ruhul
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F gorelick-etal-2021-syntax
%X In this paper, we attempt to improve upon the state-of-the-art in predicting a novel’s success by modeling the lexical semantic relationships of its contents. We created the largest dataset used in such a project containing lexical data from 17,962 books from Project Gutenberg. We utilized domain specific feature reduction techniques to implement the most accurate models to date for predicting book success, with our best model achieving an average accuracy of 94.0%. By analyzing the model parameters, we extracted the successful semantic relationships from books of 12 different genres. We finally mapped those semantic relations to a set of themes, as defined in Roget’s Thesaurus and discovered the themes that successful books of a given genre prioritize. At the end of the paper, we further showed that our model demonstrate similar performance for book success prediction even when Goodreads rating was used instead of download count to measure success.
%U https://aclanthology.org/2021.ranlp-1.53
%P 463-474
Markdown (Informal)
[Syntax and Themes: How Context Free Grammar Rules and Semantic Word Association Influence Book Success](https://aclanthology.org/2021.ranlp-1.53) (Gorelick et al., RANLP 2021)
ACL