@inproceedings{maynard-bontcheva-2016-challenges,
    title = "Challenges of Evaluating Sentiment Analysis Tools on Social Media",
    author = "Maynard, Diana  and
      Bontcheva, Kalina",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Declerck, Thierry  and
      Goggi, Sara  and
      Grobelnik, Marko  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Mazo, Helene  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios",
    booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
    month = may,
    year = "2016",
    address = "Portoro{\v{z}}, Slovenia",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L16-1182/",
    pages = "1142--1148",
    abstract = "This paper discusses the challenges in carrying out fair comparative evaluations of sentiment analysis systems. Firstly, these are due to differences in corpus annotation guidelines and sentiment class distribution. Secondly, different systems often make different assumptions about how to interpret certain statements, e.g. tweets with URLs. In order to study the impact of these on evaluation results, this paper focuses on tweet sentiment analysis in particular. One existing and two newly created corpora are used, and the performance of four different sentiment analysis systems is reported; we make our annotated datasets and sentiment analysis applications publicly available. We see considerable variations in results across the different corpora, which calls into question the validity of many existing annotated datasets and evaluations, and we make some observations about both the systems and the datasets as a result."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maynard-bontcheva-2016-challenges">
    <titleInfo>
        <title>Challenges of Evaluating Sentiment Analysis Tools on Social Media</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Diana</namePart>
        <namePart type="family">Maynard</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Kalina</namePart>
        <namePart type="family">Bontcheva</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2016-05</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Nicoletta</namePart>
            <namePart type="family">Calzolari</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Khalid</namePart>
            <namePart type="family">Choukri</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Thierry</namePart>
            <namePart type="family">Declerck</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Sara</namePart>
            <namePart type="family">Goggi</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Marko</namePart>
            <namePart type="family">Grobelnik</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Bente</namePart>
            <namePart type="family">Maegaard</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Joseph</namePart>
            <namePart type="family">Mariani</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Helene</namePart>
            <namePart type="family">Mazo</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Asuncion</namePart>
            <namePart type="family">Moreno</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jan</namePart>
            <namePart type="family">Odijk</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Stelios</namePart>
            <namePart type="family">Piperidis</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>European Language Resources Association (ELRA)</publisher>
            <place>
                <placeTerm type="text">Portorož, Slovenia</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>This paper discusses the challenges in carrying out fair comparative evaluations of sentiment analysis systems. Firstly, these are due to differences in corpus annotation guidelines and sentiment class distribution. Secondly, different systems often make different assumptions about how to interpret certain statements, e.g. tweets with URLs. In order to study the impact of these on evaluation results, this paper focuses on tweet sentiment analysis in particular. One existing and two newly created corpora are used, and the performance of four different sentiment analysis systems is reported; we make our annotated datasets and sentiment analysis applications publicly available. We see considerable variations in results across the different corpora, which calls into question the validity of many existing annotated datasets and evaluations, and we make some observations about both the systems and the datasets as a result.</abstract>
    <identifier type="citekey">maynard-bontcheva-2016-challenges</identifier>
    <location>
        <url>https://aclanthology.org/L16-1182/</url>
    </location>
    <part>
        <date>2016-05</date>
        <extent unit="page">
            <start>1142</start>
            <end>1148</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Challenges of Evaluating Sentiment Analysis Tools on Social Media
%A Maynard, Diana
%A Bontcheva, Kalina
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F maynard-bontcheva-2016-challenges
%X This paper discusses the challenges in carrying out fair comparative evaluations of sentiment analysis systems. Firstly, these are due to differences in corpus annotation guidelines and sentiment class distribution. Secondly, different systems often make different assumptions about how to interpret certain statements, e.g. tweets with URLs. In order to study the impact of these on evaluation results, this paper focuses on tweet sentiment analysis in particular. One existing and two newly created corpora are used, and the performance of four different sentiment analysis systems is reported; we make our annotated datasets and sentiment analysis applications publicly available. We see considerable variations in results across the different corpora, which calls into question the validity of many existing annotated datasets and evaluations, and we make some observations about both the systems and the datasets as a result.
%U https://aclanthology.org/L16-1182/
%P 1142-1148
Markdown (Informal)
[Challenges of Evaluating Sentiment Analysis Tools on Social Media](https://aclanthology.org/L16-1182/) (Maynard & Bontcheva, LREC 2016)
ACL