@inproceedings{thongtan-phienthrakul-2019-sentiment,
title = "Sentiment Classification Using Document Embeddings Trained with Cosine Similarity",
author = "Thongtan, Tan and
Phienthrakul, Tanasanee",
editor = "Alva-Manchego, Fernando and
Choi, Eunsol and
Khashabi, Daniel",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-2057",
doi = "10.18653/v1/P19-2057",
pages = "407--414",
abstract = "In document-level sentiment classification, each document must be mapped to a fixed length vector. Document embedding models map each document to a dense, low-dimensional vector in continuous vector space. This paper proposes training document embeddings using cosine similarity instead of dot product. Experiments on the IMDB dataset show that accuracy is improved when using cosine similarity compared to using dot product, while using feature combination with Naive Bayes weighted bag of n-grams achieves a competitive accuracy of 93.68{\%}. Code to reproduce all experiments is available at \url{https://github.com/tanthongtan/dv-cosine}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thongtan-phienthrakul-2019-sentiment">
<titleInfo>
<title>Sentiment Classification Using Document Embeddings Trained with Cosine Similarity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tan</namePart>
<namePart type="family">Thongtan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanasanee</namePart>
<namePart type="family">Phienthrakul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Khashabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In document-level sentiment classification, each document must be mapped to a fixed length vector. Document embedding models map each document to a dense, low-dimensional vector in continuous vector space. This paper proposes training document embeddings using cosine similarity instead of dot product. Experiments on the IMDB dataset show that accuracy is improved when using cosine similarity compared to using dot product, while using feature combination with Naive Bayes weighted bag of n-grams achieves a competitive accuracy of 93.68%. Code to reproduce all experiments is available at https://github.com/tanthongtan/dv-cosine.</abstract>
<identifier type="citekey">thongtan-phienthrakul-2019-sentiment</identifier>
<identifier type="doi">10.18653/v1/P19-2057</identifier>
<location>
<url>https://aclanthology.org/P19-2057</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>407</start>
<end>414</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentiment Classification Using Document Embeddings Trained with Cosine Similarity
%A Thongtan, Tan
%A Phienthrakul, Tanasanee
%Y Alva-Manchego, Fernando
%Y Choi, Eunsol
%Y Khashabi, Daniel
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F thongtan-phienthrakul-2019-sentiment
%X In document-level sentiment classification, each document must be mapped to a fixed length vector. Document embedding models map each document to a dense, low-dimensional vector in continuous vector space. This paper proposes training document embeddings using cosine similarity instead of dot product. Experiments on the IMDB dataset show that accuracy is improved when using cosine similarity compared to using dot product, while using feature combination with Naive Bayes weighted bag of n-grams achieves a competitive accuracy of 93.68%. Code to reproduce all experiments is available at https://github.com/tanthongtan/dv-cosine.
%R 10.18653/v1/P19-2057
%U https://aclanthology.org/P19-2057
%U https://doi.org/10.18653/v1/P19-2057
%P 407-414
Markdown (Informal)
[Sentiment Classification Using Document Embeddings Trained with Cosine Similarity](https://aclanthology.org/P19-2057) (Thongtan & Phienthrakul, ACL 2019)
ACL