@inproceedings{sarker-gonzalez-2017-hlp,
    title = "{HLP}@{UP}enn at {S}em{E}val-2017 Task 4{A}: A simple, self-optimizing text classification system combining dense and sparse vectors",
    author = "Sarker, Abeed  and
      Gonzalez, Graciela",
    editor = "Bethard, Steven  and
      Carpuat, Marine  and
      Apidianaki, Marianna  and
      Mohammad, Saif M.  and
      Cer, Daniel  and
      Jurgens, David",
    booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)",
    month = aug,
    year = "2017",
    address = "Vancouver, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/S17-2105/",
    doi = "10.18653/v1/S17-2105",
    pages = "640--643",
    abstract = "We present a simple supervised text classification system that combines sparse and dense vector representations of words, and generalized representations of words via clusters. The sparse vectors are generated from word n-gram sequences (1-3). The dense vector representations of words (embeddings) are learned by training a neural network to predict neighboring words in a large unlabeled dataset. To classify a text segment, the different representations of it are concatenated, and the classification is performed using Support Vector Machines (SVM). Our system is particularly intended for use by non-experts of natural language processing and machine learning, and, therefore, the system does not require any manual tuning of parameters or weights. Given a training set, the system automatically generates the training vectors, optimizes the relevant hyper-parameters for the SVM classifier, and trains the classification model. We evaluated this system on the SemEval-2017 English sentiment analysis task. In terms of average F1-score, our system obtained 8th position out of 39 submissions (F1-score: 0.632, average recall: 0.637, accuracy: 0.646)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sarker-gonzalez-2017-hlp">
    <titleInfo>
        <title>HLP@UPenn at SemEval-2017 Task 4A: A simple, self-optimizing text classification system combining dense and sparse vectors</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Abeed</namePart>
        <namePart type="family">Sarker</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Graciela</namePart>
        <namePart type="family">Gonzalez</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2017-08</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Steven</namePart>
            <namePart type="family">Bethard</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Marine</namePart>
            <namePart type="family">Carpuat</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Marianna</namePart>
            <namePart type="family">Apidianaki</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Saif</namePart>
            <namePart type="given">M</namePart>
            <namePart type="family">Mohammad</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Daniel</namePart>
            <namePart type="family">Cer</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">David</namePart>
            <namePart type="family">Jurgens</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Vancouver, Canada</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>We present a simple supervised text classification system that combines sparse and dense vector representations of words, and generalized representations of words via clusters. The sparse vectors are generated from word n-gram sequences (1-3). The dense vector representations of words (embeddings) are learned by training a neural network to predict neighboring words in a large unlabeled dataset. To classify a text segment, the different representations of it are concatenated, and the classification is performed using Support Vector Machines (SVM). Our system is particularly intended for use by non-experts of natural language processing and machine learning, and, therefore, the system does not require any manual tuning of parameters or weights. Given a training set, the system automatically generates the training vectors, optimizes the relevant hyper-parameters for the SVM classifier, and trains the classification model. We evaluated this system on the SemEval-2017 English sentiment analysis task. In terms of average F1-score, our system obtained 8th position out of 39 submissions (F1-score: 0.632, average recall: 0.637, accuracy: 0.646).</abstract>
    <identifier type="citekey">sarker-gonzalez-2017-hlp</identifier>
    <identifier type="doi">10.18653/v1/S17-2105</identifier>
    <location>
        <url>https://aclanthology.org/S17-2105/</url>
    </location>
    <part>
        <date>2017-08</date>
        <extent unit="page">
            <start>640</start>
            <end>643</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HLP@UPenn at SemEval-2017 Task 4A: A simple, self-optimizing text classification system combining dense and sparse vectors
%A Sarker, Abeed
%A Gonzalez, Graciela
%Y Bethard, Steven
%Y Carpuat, Marine
%Y Apidianaki, Marianna
%Y Mohammad, Saif M.
%Y Cer, Daniel
%Y Jurgens, David
%S Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F sarker-gonzalez-2017-hlp
%X We present a simple supervised text classification system that combines sparse and dense vector representations of words, and generalized representations of words via clusters. The sparse vectors are generated from word n-gram sequences (1-3). The dense vector representations of words (embeddings) are learned by training a neural network to predict neighboring words in a large unlabeled dataset. To classify a text segment, the different representations of it are concatenated, and the classification is performed using Support Vector Machines (SVM). Our system is particularly intended for use by non-experts of natural language processing and machine learning, and, therefore, the system does not require any manual tuning of parameters or weights. Given a training set, the system automatically generates the training vectors, optimizes the relevant hyper-parameters for the SVM classifier, and trains the classification model. We evaluated this system on the SemEval-2017 English sentiment analysis task. In terms of average F1-score, our system obtained 8th position out of 39 submissions (F1-score: 0.632, average recall: 0.637, accuracy: 0.646).
%R 10.18653/v1/S17-2105
%U https://aclanthology.org/S17-2105/
%U https://doi.org/10.18653/v1/S17-2105
%P 640-643
Markdown (Informal)
[HLP@UPenn at SemEval-2017 Task 4A: A simple, self-optimizing text classification system combining dense and sparse vectors](https://aclanthology.org/S17-2105/) (Sarker & Gonzalez, SemEval 2017)
ACL