@inproceedings{mervaala-kousa-2024-order,
title = "Order Up! Micromanaging Inconsistencies in {C}hat{GPT}-4o Text Analyses",
author = "Mervaala, Erkki and
Kousa, Ilona",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.51",
doi = "10.18653/v1/2024.nlp4dh-1.51",
pages = "521--535",
abstract = "Large language model (LLM) applications have taken the world by storm in the past two years, and the academic sphere has not been an exception. One common, cumbersome task for researchers to attempt to automatise has been text annotation and, to an extent, analysis. Popular LLMs such as ChatGPT have been examined as a research assistant and as an analysis tool, and several discrepancies regarding both transparency and the generative content have been uncovered. Our research approaches the usability and trustworthiness of ChatGPT for text analysis from the point of view of an {``}out-of-the-box{''} zero-shot or few-shot setting, focusing on how the context window and mixed text types affect the analyses generated. Results from our testing indicate that both the types of the texts and the ordering of different kinds of texts do affect the ChatGPT analysis, but also that the context-building is less likely to cause analysis deterioration when analysing similar texts. Though some of these issues are at the core of how LLMs function, many of these caveats can be addressed by transparent research planning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mervaala-kousa-2024-order">
<titleInfo>
<title>Order Up! Micromanaging Inconsistencies in ChatGPT-4o Text Analyses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Erkki</namePart>
<namePart type="family">Mervaala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilona</namePart>
<namePart type="family">Kousa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language model (LLM) applications have taken the world by storm in the past two years, and the academic sphere has not been an exception. One common, cumbersome task for researchers to attempt to automatise has been text annotation and, to an extent, analysis. Popular LLMs such as ChatGPT have been examined as a research assistant and as an analysis tool, and several discrepancies regarding both transparency and the generative content have been uncovered. Our research approaches the usability and trustworthiness of ChatGPT for text analysis from the point of view of an “out-of-the-box” zero-shot or few-shot setting, focusing on how the context window and mixed text types affect the analyses generated. Results from our testing indicate that both the types of the texts and the ordering of different kinds of texts do affect the ChatGPT analysis, but also that the context-building is less likely to cause analysis deterioration when analysing similar texts. Though some of these issues are at the core of how LLMs function, many of these caveats can be addressed by transparent research planning.</abstract>
<identifier type="citekey">mervaala-kousa-2024-order</identifier>
<identifier type="doi">10.18653/v1/2024.nlp4dh-1.51</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.51</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>521</start>
<end>535</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Order Up! Micromanaging Inconsistencies in ChatGPT-4o Text Analyses
%A Mervaala, Erkki
%A Kousa, Ilona
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F mervaala-kousa-2024-order
%X Large language model (LLM) applications have taken the world by storm in the past two years, and the academic sphere has not been an exception. One common, cumbersome task for researchers to attempt to automatise has been text annotation and, to an extent, analysis. Popular LLMs such as ChatGPT have been examined as a research assistant and as an analysis tool, and several discrepancies regarding both transparency and the generative content have been uncovered. Our research approaches the usability and trustworthiness of ChatGPT for text analysis from the point of view of an “out-of-the-box” zero-shot or few-shot setting, focusing on how the context window and mixed text types affect the analyses generated. Results from our testing indicate that both the types of the texts and the ordering of different kinds of texts do affect the ChatGPT analysis, but also that the context-building is less likely to cause analysis deterioration when analysing similar texts. Though some of these issues are at the core of how LLMs function, many of these caveats can be addressed by transparent research planning.
%R 10.18653/v1/2024.nlp4dh-1.51
%U https://aclanthology.org/2024.nlp4dh-1.51
%U https://doi.org/10.18653/v1/2024.nlp4dh-1.51
%P 521-535
Markdown (Informal)
[Order Up! Micromanaging Inconsistencies in ChatGPT-4o Text Analyses](https://aclanthology.org/2024.nlp4dh-1.51) (Mervaala & Kousa, NLP4DH 2024)
ACL