@inproceedings{hamerlik-etal-2024-chatgpt,
title = "{C}hat{GPT} as Your n-th Annotator: Experiments in Leveraging Large Language Models for Social Science Text Annotation in {S}lovak Language",
author = "Hamerlik, Endre and
{\v{S}}uppa, Marek and
Bl{\v{s}}t{\'a}k, Miroslav and
Kub{\'\i}k, Jozef and
Tak{\'a}{\v{c}}, Martin and
{\v{S}}imko, Mari{\'a}n and
Findor, Andrej",
editor = "Klamm, Christopher and
Lapesa, Gabriella and
Ponzetto, Simone Paolo and
Rehbein, Ines and
Sen, Indira",
booktitle = "Proceedings of the 4th Workshop on Computational Linguistics for the Political and Social Sciences: Long and short papers",
month = sep,
year = "2024",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.cpss-1.6",
pages = "81--89",
abstract = "Large Language Models (LLMs) are increasingly influential in Computational Social Science, offering new methods for processing and analyzing data, particularly in lower-resource language contexts. This study explores the use of OpenAI{'}s GPT-3.5 Turbo and GPT-4 for automating annotations for a unique news media dataset in a lower resourced language, focusing on stance classification tasks. Our results reveal that prompting in the native language, explanation generation, and advanced prompting strategies like Retrieval Augmented Generation and Chain of Thought prompting enhance LLM performance, particularly noting GPT-4{'}s superiority in predicting stance. Further evaluation indicates that LLMs can serve as a useful tool for social science text annotation in lower resourced languages, notably in identifying inconsistencies in annotation guidelines and annotated datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hamerlik-etal-2024-chatgpt">
<titleInfo>
<title>ChatGPT as Your n-th Annotator: Experiments in Leveraging Large Language Models for Social Science Text Annotation in Slovak Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Endre</namePart>
<namePart type="family">Hamerlik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Šuppa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miroslav</namePart>
<namePart type="family">Blšták</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jozef</namePart>
<namePart type="family">Kubík</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Takáč</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marián</namePart>
<namePart type="family">Šimko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrej</namePart>
<namePart type="family">Findor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Computational Linguistics for the Political and Social Sciences: Long and short papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Klamm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Lapesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="given">Paolo</namePart>
<namePart type="family">Ponzetto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Rehbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indira</namePart>
<namePart type="family">Sen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) are increasingly influential in Computational Social Science, offering new methods for processing and analyzing data, particularly in lower-resource language contexts. This study explores the use of OpenAI’s GPT-3.5 Turbo and GPT-4 for automating annotations for a unique news media dataset in a lower resourced language, focusing on stance classification tasks. Our results reveal that prompting in the native language, explanation generation, and advanced prompting strategies like Retrieval Augmented Generation and Chain of Thought prompting enhance LLM performance, particularly noting GPT-4’s superiority in predicting stance. Further evaluation indicates that LLMs can serve as a useful tool for social science text annotation in lower resourced languages, notably in identifying inconsistencies in annotation guidelines and annotated datasets.</abstract>
<identifier type="citekey">hamerlik-etal-2024-chatgpt</identifier>
<location>
<url>https://aclanthology.org/2024.cpss-1.6</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>81</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ChatGPT as Your n-th Annotator: Experiments in Leveraging Large Language Models for Social Science Text Annotation in Slovak Language
%A Hamerlik, Endre
%A Šuppa, Marek
%A Blšták, Miroslav
%A Kubík, Jozef
%A Takáč, Martin
%A Šimko, Marián
%A Findor, Andrej
%Y Klamm, Christopher
%Y Lapesa, Gabriella
%Y Ponzetto, Simone Paolo
%Y Rehbein, Ines
%Y Sen, Indira
%S Proceedings of the 4th Workshop on Computational Linguistics for the Political and Social Sciences: Long and short papers
%D 2024
%8 September
%I Association for Computational Linguistics
%C Vienna, Austria
%F hamerlik-etal-2024-chatgpt
%X Large Language Models (LLMs) are increasingly influential in Computational Social Science, offering new methods for processing and analyzing data, particularly in lower-resource language contexts. This study explores the use of OpenAI’s GPT-3.5 Turbo and GPT-4 for automating annotations for a unique news media dataset in a lower resourced language, focusing on stance classification tasks. Our results reveal that prompting in the native language, explanation generation, and advanced prompting strategies like Retrieval Augmented Generation and Chain of Thought prompting enhance LLM performance, particularly noting GPT-4’s superiority in predicting stance. Further evaluation indicates that LLMs can serve as a useful tool for social science text annotation in lower resourced languages, notably in identifying inconsistencies in annotation guidelines and annotated datasets.
%U https://aclanthology.org/2024.cpss-1.6
%P 81-89
Markdown (Informal)
[ChatGPT as Your n-th Annotator: Experiments in Leveraging Large Language Models for Social Science Text Annotation in Slovak Language](https://aclanthology.org/2024.cpss-1.6) (Hamerlik et al., cpss-WS 2024)
ACL