@inproceedings{satthar-etal-2017-calibration,
title = "A Calibration Method for Evaluation of Sentiment Analysis",
author = "Satthar, F. Sharmila and
Evans, Roger and
Uchyigit, Gulden",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_084",
doi = "10.26615/978-954-452-049-6_084",
pages = "652--660",
abstract = "Sentiment analysis is the computational task of extracting sentiment from a text document {--} for example whether it expresses a positive, negative or neutral opinion. Various approaches have been introduced in recent years, using a range of different techniques to extract sentiment information from a document. Measuring these methods against a gold standard dataset is a useful way to evaluate such systems. However, different sentiment analysis techniques represent sentiment values in different ways, such as discrete categorical classes or continuous numerical sentiment scores. This creates a challenge for evaluating and comparing such systems; in particular assessing numerical scores against datasets that use fixed classes is difficult, because the numerical outputs have to be mapped onto the ordered classes. This paper proposes a novel calibration technique that uses precision vs. recall curves to set class thresholds to optimize a continuous sentiment analyser{'}s performance against a discrete gold standard dataset. In experiments mapping a continuous score onto a three-class classification of movie reviews, we show that calibration results in a substantial increase in f-score when compared to a non-calibrated mapping.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="satthar-etal-2017-calibration">
<titleInfo>
<title>A Calibration Method for Evaluation of Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">F</namePart>
<namePart type="given">Sharmila</namePart>
<namePart type="family">Satthar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roger</namePart>
<namePart type="family">Evans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gulden</namePart>
<namePart type="family">Uchyigit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentiment analysis is the computational task of extracting sentiment from a text document – for example whether it expresses a positive, negative or neutral opinion. Various approaches have been introduced in recent years, using a range of different techniques to extract sentiment information from a document. Measuring these methods against a gold standard dataset is a useful way to evaluate such systems. However, different sentiment analysis techniques represent sentiment values in different ways, such as discrete categorical classes or continuous numerical sentiment scores. This creates a challenge for evaluating and comparing such systems; in particular assessing numerical scores against datasets that use fixed classes is difficult, because the numerical outputs have to be mapped onto the ordered classes. This paper proposes a novel calibration technique that uses precision vs. recall curves to set class thresholds to optimize a continuous sentiment analyser’s performance against a discrete gold standard dataset. In experiments mapping a continuous score onto a three-class classification of movie reviews, we show that calibration results in a substantial increase in f-score when compared to a non-calibrated mapping.</abstract>
<identifier type="citekey">satthar-etal-2017-calibration</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_084</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>652</start>
<end>660</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Calibration Method for Evaluation of Sentiment Analysis
%A Satthar, F. Sharmila
%A Evans, Roger
%A Uchyigit, Gulden
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F satthar-etal-2017-calibration
%X Sentiment analysis is the computational task of extracting sentiment from a text document – for example whether it expresses a positive, negative or neutral opinion. Various approaches have been introduced in recent years, using a range of different techniques to extract sentiment information from a document. Measuring these methods against a gold standard dataset is a useful way to evaluate such systems. However, different sentiment analysis techniques represent sentiment values in different ways, such as discrete categorical classes or continuous numerical sentiment scores. This creates a challenge for evaluating and comparing such systems; in particular assessing numerical scores against datasets that use fixed classes is difficult, because the numerical outputs have to be mapped onto the ordered classes. This paper proposes a novel calibration technique that uses precision vs. recall curves to set class thresholds to optimize a continuous sentiment analyser’s performance against a discrete gold standard dataset. In experiments mapping a continuous score onto a three-class classification of movie reviews, we show that calibration results in a substantial increase in f-score when compared to a non-calibrated mapping.
%R 10.26615/978-954-452-049-6_084
%U https://doi.org/10.26615/978-954-452-049-6_084
%P 652-660
Markdown (Informal)
[A Calibration Method for Evaluation of Sentiment Analysis](https://doi.org/10.26615/978-954-452-049-6_084) (Satthar et al., RANLP 2017)
ACL