@inproceedings{krusic-2024-constructing,
title = "Constructing a Sentiment-Annotated Corpus of {A}ustrian Historical Newspapers: Challenges, Tools, and Annotator Experience",
author = "Krusic, Lucija",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.6",
pages = "51--62",
abstract = "This study presents the development of a sentiment-annotated corpus of historical newspaper texts in Austrian German, addressing a gap in annotated corpora for Natural Language Processing in the field of Digital Humanities. Three annotators categorised 1005 sentences from two 19th-century periodicals into four sentiment categories: positive, negative, neutral, and mixed. The annotators, Masters and PhD students in Linguistics and Digital Humanities, are considered semi-experts and have received substantial training during this annotation study. Three tools were used and compared in the annotation process: Google Sheets, Google Forms and Doccano, and resulted in a gold standard corpus. The analysis revealed a fair to moderate inter-rater agreement (Fleiss{'} kappa = 0.405) and an average percentage agreement of 45.7{\%} for full consensus and 92.5{\%} for majority vote. As majority vote is needed for the creation of a gold standard corpus, these results are considered sufficient, and the annotations reliable. The study also introduced comprehensive guidelines for sentiment annotation, which were essential to overcome the challenges posed by historical language and context. The annotators{'} experience was assessed through a combination of standardised usability tests (NASA-TLX and UEQ-S) and a detailed custom-made user experience questionnaire, which provided qualitative insights into the difficulties and usability of the tools used. The questionnaire is an additional resource that can be used to assess usability and user experience assessments in future annotation studies. The findings demonstrate the effectiveness of semi-expert annotators and dedicated tools in producing reliable annotations and provide valuable resources, including the annotated corpus, guidelines, and a user experience questionnaire, for future sentiment analysis and annotation of Austrian historical texts. The sentiment-annotated corpus will be used as the gold standard for fine-tuning and evaluating machine learning models for sentiment analysis of Austrian historical newspapers with the topic of migration and minorities in a subsequent study.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krusic-2024-constructing">
<titleInfo>
<title>Constructing a Sentiment-Annotated Corpus of Austrian Historical Newspapers: Challenges, Tools, and Annotator Experience</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucija</namePart>
<namePart type="family">Krusic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study presents the development of a sentiment-annotated corpus of historical newspaper texts in Austrian German, addressing a gap in annotated corpora for Natural Language Processing in the field of Digital Humanities. Three annotators categorised 1005 sentences from two 19th-century periodicals into four sentiment categories: positive, negative, neutral, and mixed. The annotators, Masters and PhD students in Linguistics and Digital Humanities, are considered semi-experts and have received substantial training during this annotation study. Three tools were used and compared in the annotation process: Google Sheets, Google Forms and Doccano, and resulted in a gold standard corpus. The analysis revealed a fair to moderate inter-rater agreement (Fleiss’ kappa = 0.405) and an average percentage agreement of 45.7% for full consensus and 92.5% for majority vote. As majority vote is needed for the creation of a gold standard corpus, these results are considered sufficient, and the annotations reliable. The study also introduced comprehensive guidelines for sentiment annotation, which were essential to overcome the challenges posed by historical language and context. The annotators’ experience was assessed through a combination of standardised usability tests (NASA-TLX and UEQ-S) and a detailed custom-made user experience questionnaire, which provided qualitative insights into the difficulties and usability of the tools used. The questionnaire is an additional resource that can be used to assess usability and user experience assessments in future annotation studies. The findings demonstrate the effectiveness of semi-expert annotators and dedicated tools in producing reliable annotations and provide valuable resources, including the annotated corpus, guidelines, and a user experience questionnaire, for future sentiment analysis and annotation of Austrian historical texts. The sentiment-annotated corpus will be used as the gold standard for fine-tuning and evaluating machine learning models for sentiment analysis of Austrian historical newspapers with the topic of migration and minorities in a subsequent study.</abstract>
<identifier type="citekey">krusic-2024-constructing</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.6</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>51</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Constructing a Sentiment-Annotated Corpus of Austrian Historical Newspapers: Challenges, Tools, and Annotator Experience
%A Krusic, Lucija
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F krusic-2024-constructing
%X This study presents the development of a sentiment-annotated corpus of historical newspaper texts in Austrian German, addressing a gap in annotated corpora for Natural Language Processing in the field of Digital Humanities. Three annotators categorised 1005 sentences from two 19th-century periodicals into four sentiment categories: positive, negative, neutral, and mixed. The annotators, Masters and PhD students in Linguistics and Digital Humanities, are considered semi-experts and have received substantial training during this annotation study. Three tools were used and compared in the annotation process: Google Sheets, Google Forms and Doccano, and resulted in a gold standard corpus. The analysis revealed a fair to moderate inter-rater agreement (Fleiss’ kappa = 0.405) and an average percentage agreement of 45.7% for full consensus and 92.5% for majority vote. As majority vote is needed for the creation of a gold standard corpus, these results are considered sufficient, and the annotations reliable. The study also introduced comprehensive guidelines for sentiment annotation, which were essential to overcome the challenges posed by historical language and context. The annotators’ experience was assessed through a combination of standardised usability tests (NASA-TLX and UEQ-S) and a detailed custom-made user experience questionnaire, which provided qualitative insights into the difficulties and usability of the tools used. The questionnaire is an additional resource that can be used to assess usability and user experience assessments in future annotation studies. The findings demonstrate the effectiveness of semi-expert annotators and dedicated tools in producing reliable annotations and provide valuable resources, including the annotated corpus, guidelines, and a user experience questionnaire, for future sentiment analysis and annotation of Austrian historical texts. The sentiment-annotated corpus will be used as the gold standard for fine-tuning and evaluating machine learning models for sentiment analysis of Austrian historical newspapers with the topic of migration and minorities in a subsequent study.
%U https://aclanthology.org/2024.nlp4dh-1.6
%P 51-62
Markdown (Informal)
[Constructing a Sentiment-Annotated Corpus of Austrian Historical Newspapers: Challenges, Tools, and Annotator Experience](https://aclanthology.org/2024.nlp4dh-1.6) (Krusic, NLP4DH 2024)
ACL