@article{stoupak-etal-2025-chatgpt,
title = "Does {ChatGPT} Adapt Itself to the Language Used and the Audience It Implies?",
author = {Stoupak, Iglika Nikolova and
Lejeune, Ga{\"e}l and
Schaeffer-Lacroix, Eva},
editor = "Koeva, Svetla",
journal = "Journal Computational Linguistics in Bulgaria",
volume = "1",
month = jul,
year = "2025",
address = "Sofia, Bulgaria",
publisher = "Institute for Bulgarian Language, Department of Computational Linguistics, Bulgarian Academy of Sciences",
url = "https://aclanthology.org/2025.jclib-1.2/",
doi = "10.47810/JCLIB.1.2025.02",
pages = "11--41",
abstract = "This paper seeks to quantify and analyse the progress that ChatGPT has made from its GPT-3.5 (2022) to its GPT-4.5 (2025) version when it comes to answering prompts in a selection of differently-resourced languages: English, Bulgarian, Greek, French, Hebrew, Japanese and Russian. Factual correctness, textual quality and an answer{'}s linguistic and cultural independence from an English baseline are evaluated in the process. Each response is marked positively or negatively for each of the three metrics based on a set of defined criteria and careful humanbased analysis. In addition, three categories of questions are experimented with: general (e.g. communication assistance or request for jokes), perception-related (e.g. creative writing or explanation of physical processes) and geography-/culture-sensitive (questions in a specific language that address a particular, slightly sensitive topic related to the implied audience e.g. `Why do French people eat snails?'). As hypothesised, the recent GPT-4.5 version demonstrates significant progress in all evaluated categories, thereby resolving past issues such as decreased textual quality of low-resourced languages and, notably, very limited variety in answers to the same question across languages. The metric `Independence from the (English) Baseline' receives 80.95{\%} of positive marks in the GPT-4.5 version as opposed to 26.19{\%} for GPT-3.5. Lingering problems include ChatGPT{'}s incomplete ability to generate relevant and culturally-sensitive jokes and poems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stoupak-etal-2025-chatgpt">
<titleInfo>
<title>Does ChatGPT Adapt Itself to the Language Used and the Audience It Implies?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iglika</namePart>
<namePart type="given">Nikolova</namePart>
<namePart type="family">Stoupak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaël</namePart>
<namePart type="family">Lejeune</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Schaeffer-Lacroix</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Journal Computational Linguistics in Bulgaria</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>Institute for Bulgarian Language, Department of Computational Linguistics, Bulgarian Academy of Sciences</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This paper seeks to quantify and analyse the progress that ChatGPT has made from its GPT-3.5 (2022) to its GPT-4.5 (2025) version when it comes to answering prompts in a selection of differently-resourced languages: English, Bulgarian, Greek, French, Hebrew, Japanese and Russian. Factual correctness, textual quality and an answer’s linguistic and cultural independence from an English baseline are evaluated in the process. Each response is marked positively or negatively for each of the three metrics based on a set of defined criteria and careful humanbased analysis. In addition, three categories of questions are experimented with: general (e.g. communication assistance or request for jokes), perception-related (e.g. creative writing or explanation of physical processes) and geography-/culture-sensitive (questions in a specific language that address a particular, slightly sensitive topic related to the implied audience e.g. ‘Why do French people eat snails?’). As hypothesised, the recent GPT-4.5 version demonstrates significant progress in all evaluated categories, thereby resolving past issues such as decreased textual quality of low-resourced languages and, notably, very limited variety in answers to the same question across languages. The metric ‘Independence from the (English) Baseline’ receives 80.95% of positive marks in the GPT-4.5 version as opposed to 26.19% for GPT-3.5. Lingering problems include ChatGPT’s incomplete ability to generate relevant and culturally-sensitive jokes and poems.</abstract>
<identifier type="citekey">stoupak-etal-2025-chatgpt</identifier>
<identifier type="doi">10.47810/JCLIB.1.2025.02</identifier>
<location>
<url>https://aclanthology.org/2025.jclib-1.2/</url>
</location>
<part>
<date>2025-07</date>
<detail type="volume"><number>1</number></detail>
<extent unit="page">
<start>11</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Does ChatGPT Adapt Itself to the Language Used and the Audience It Implies?
%A Stoupak, Iglika Nikolova
%A Lejeune, Gaël
%A Schaeffer-Lacroix, Eva
%J Journal Computational Linguistics in Bulgaria
%D 2025
%8 July
%V 1
%I Institute for Bulgarian Language, Department of Computational Linguistics, Bulgarian Academy of Sciences
%C Sofia, Bulgaria
%F stoupak-etal-2025-chatgpt
%X This paper seeks to quantify and analyse the progress that ChatGPT has made from its GPT-3.5 (2022) to its GPT-4.5 (2025) version when it comes to answering prompts in a selection of differently-resourced languages: English, Bulgarian, Greek, French, Hebrew, Japanese and Russian. Factual correctness, textual quality and an answer’s linguistic and cultural independence from an English baseline are evaluated in the process. Each response is marked positively or negatively for each of the three metrics based on a set of defined criteria and careful humanbased analysis. In addition, three categories of questions are experimented with: general (e.g. communication assistance or request for jokes), perception-related (e.g. creative writing or explanation of physical processes) and geography-/culture-sensitive (questions in a specific language that address a particular, slightly sensitive topic related to the implied audience e.g. ‘Why do French people eat snails?’). As hypothesised, the recent GPT-4.5 version demonstrates significant progress in all evaluated categories, thereby resolving past issues such as decreased textual quality of low-resourced languages and, notably, very limited variety in answers to the same question across languages. The metric ‘Independence from the (English) Baseline’ receives 80.95% of positive marks in the GPT-4.5 version as opposed to 26.19% for GPT-3.5. Lingering problems include ChatGPT’s incomplete ability to generate relevant and culturally-sensitive jokes and poems.
%R 10.47810/JCLIB.1.2025.02
%U https://aclanthology.org/2025.jclib-1.2/
%U https://doi.org/10.47810/JCLIB.1.2025.02
%P 11-41
Markdown (Informal)
[Does ChatGPT Adapt Itself to the Language Used and the Audience It Implies?](https://aclanthology.org/2025.jclib-1.2/) (Stoupak et al., JCLIB 2025)
ACL