@inproceedings{giovanni-moller-maria-aiello-2025-prompt,
title = "Prompt Refinement or Fine-tuning? Best Practices for using {LLM}s in Computational Social Science Tasks",
author = "Giovanni M{\o}ller, Anders and
Maria Aiello, Luca",
editor = "Hale, James and
Kwon, Brian Deuksin and
Dutt, Ritam",
booktitle = "Proceedings of the Third Workshop on Social Influence in Conversations (SICon 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sicon-1.2/",
doi = "10.18653/v1/2025.sicon-1.2",
pages = "27--49",
ISBN = "979-8-89176-266-4",
abstract = "Large Language Models are expressive tools that enable complex tasks of text understanding within Computational Social Science. Their versatility, while beneficial, poses a barrier for establishing standardized best practices within the field. To bring clarity on the values of different strategies, we present an overview of the performance of modern LLM-based classification methods on a benchmark of 23 social knowledge tasks. Our results point to three best practices: prioritize models with larger vocabulary and pre-training corpora; avoid simple zero-shot in favor of AI-enhanced prompting; fine-tune on task-specific data, and consider more complex forms instruction-tuning on multiple datasets only when only training data is more abundant."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="giovanni-moller-maria-aiello-2025-prompt">
<titleInfo>
<title>Prompt Refinement or Fine-tuning? Best Practices for using LLMs in Computational Social Science Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anders</namePart>
<namePart type="family">Giovanni Møller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Maria Aiello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Social Influence in Conversations (SICon 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Hale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="given">Deuksin</namePart>
<namePart type="family">Kwon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritam</namePart>
<namePart type="family">Dutt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-266-4</identifier>
</relatedItem>
<abstract>Large Language Models are expressive tools that enable complex tasks of text understanding within Computational Social Science. Their versatility, while beneficial, poses a barrier for establishing standardized best practices within the field. To bring clarity on the values of different strategies, we present an overview of the performance of modern LLM-based classification methods on a benchmark of 23 social knowledge tasks. Our results point to three best practices: prioritize models with larger vocabulary and pre-training corpora; avoid simple zero-shot in favor of AI-enhanced prompting; fine-tune on task-specific data, and consider more complex forms instruction-tuning on multiple datasets only when only training data is more abundant.</abstract>
<identifier type="citekey">giovanni-moller-maria-aiello-2025-prompt</identifier>
<identifier type="doi">10.18653/v1/2025.sicon-1.2</identifier>
<location>
<url>https://aclanthology.org/2025.sicon-1.2/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>27</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Prompt Refinement or Fine-tuning? Best Practices for using LLMs in Computational Social Science Tasks
%A Giovanni Møller, Anders
%A Maria Aiello, Luca
%Y Hale, James
%Y Kwon, Brian Deuksin
%Y Dutt, Ritam
%S Proceedings of the Third Workshop on Social Influence in Conversations (SICon 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-266-4
%F giovanni-moller-maria-aiello-2025-prompt
%X Large Language Models are expressive tools that enable complex tasks of text understanding within Computational Social Science. Their versatility, while beneficial, poses a barrier for establishing standardized best practices within the field. To bring clarity on the values of different strategies, we present an overview of the performance of modern LLM-based classification methods on a benchmark of 23 social knowledge tasks. Our results point to three best practices: prioritize models with larger vocabulary and pre-training corpora; avoid simple zero-shot in favor of AI-enhanced prompting; fine-tune on task-specific data, and consider more complex forms instruction-tuning on multiple datasets only when only training data is more abundant.
%R 10.18653/v1/2025.sicon-1.2
%U https://aclanthology.org/2025.sicon-1.2/
%U https://doi.org/10.18653/v1/2025.sicon-1.2
%P 27-49
Markdown (Informal)
[Prompt Refinement or Fine-tuning? Best Practices for using LLMs in Computational Social Science Tasks](https://aclanthology.org/2025.sicon-1.2/) (Giovanni Møller & Maria Aiello, SICon 2025)
ACL