@inproceedings{qureshi-etal-2019-simple,
title = "A Simple Approach to Classify Fictional and Non-Fictional Genres",
author = "Qureshi, Mohammed Rameez and
Ranjan, Sidharth and
Rajkumar, Rajakrishnan and
Shah, Kushal",
editor = "Ferraro, Francis and
Huang, Ting-Hao {`}Kenneth{'} and
Lukin, Stephanie M. and
Mitchell, Margaret",
booktitle = "Proceedings of the Second Workshop on Storytelling",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3409",
doi = "10.18653/v1/W19-3409",
pages = "81--89",
abstract = "In this work, we deploy a logistic regression classifier to ascertain whether a given document belongs to the fiction or non-fiction genre. For genre identification, previous work had proposed three classes of features, viz., low-level (character-level and token counts), high-level (lexical and syntactic information) and derived features (type-token ratio, average word length or average sentence length). Using the Recursive feature elimination with cross-validation (RFECV) algorithm, we perform feature selection experiments on an exhaustive set of nineteen features (belonging to all the classes mentioned above) extracted from Brown corpus text. As a result, two simple features viz., the ratio of the number of adverbs to adjectives and the number of adjectives to pronouns turn out to be the most significant. Subsequently, our classification experiments aimed towards genre identification of documents from the Brown and Baby BNC corpora demonstrate that the performance of a classifier containing just the two aforementioned features is at par with that of a classifier containing the exhaustive feature set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qureshi-etal-2019-simple">
<titleInfo>
<title>A Simple Approach to Classify Fictional and Non-Fictional Genres</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Rameez</namePart>
<namePart type="family">Qureshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sidharth</namePart>
<namePart type="family">Ranjan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajakrishnan</namePart>
<namePart type="family">Rajkumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kushal</namePart>
<namePart type="family">Shah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Storytelling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Ferraro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ting-Hao</namePart>
<namePart type="given">‘Kenneth’</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Lukin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margaret</namePart>
<namePart type="family">Mitchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we deploy a logistic regression classifier to ascertain whether a given document belongs to the fiction or non-fiction genre. For genre identification, previous work had proposed three classes of features, viz., low-level (character-level and token counts), high-level (lexical and syntactic information) and derived features (type-token ratio, average word length or average sentence length). Using the Recursive feature elimination with cross-validation (RFECV) algorithm, we perform feature selection experiments on an exhaustive set of nineteen features (belonging to all the classes mentioned above) extracted from Brown corpus text. As a result, two simple features viz., the ratio of the number of adverbs to adjectives and the number of adjectives to pronouns turn out to be the most significant. Subsequently, our classification experiments aimed towards genre identification of documents from the Brown and Baby BNC corpora demonstrate that the performance of a classifier containing just the two aforementioned features is at par with that of a classifier containing the exhaustive feature set.</abstract>
<identifier type="citekey">qureshi-etal-2019-simple</identifier>
<identifier type="doi">10.18653/v1/W19-3409</identifier>
<location>
<url>https://aclanthology.org/W19-3409</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>81</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Simple Approach to Classify Fictional and Non-Fictional Genres
%A Qureshi, Mohammed Rameez
%A Ranjan, Sidharth
%A Rajkumar, Rajakrishnan
%A Shah, Kushal
%Y Ferraro, Francis
%Y Huang, Ting-Hao ‘Kenneth’
%Y Lukin, Stephanie M.
%Y Mitchell, Margaret
%S Proceedings of the Second Workshop on Storytelling
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F qureshi-etal-2019-simple
%X In this work, we deploy a logistic regression classifier to ascertain whether a given document belongs to the fiction or non-fiction genre. For genre identification, previous work had proposed three classes of features, viz., low-level (character-level and token counts), high-level (lexical and syntactic information) and derived features (type-token ratio, average word length or average sentence length). Using the Recursive feature elimination with cross-validation (RFECV) algorithm, we perform feature selection experiments on an exhaustive set of nineteen features (belonging to all the classes mentioned above) extracted from Brown corpus text. As a result, two simple features viz., the ratio of the number of adverbs to adjectives and the number of adjectives to pronouns turn out to be the most significant. Subsequently, our classification experiments aimed towards genre identification of documents from the Brown and Baby BNC corpora demonstrate that the performance of a classifier containing just the two aforementioned features is at par with that of a classifier containing the exhaustive feature set.
%R 10.18653/v1/W19-3409
%U https://aclanthology.org/W19-3409
%U https://doi.org/10.18653/v1/W19-3409
%P 81-89
Markdown (Informal)
[A Simple Approach to Classify Fictional and Non-Fictional Genres](https://aclanthology.org/W19-3409) (Qureshi et al., Story-NLP 2019)
ACL