@inproceedings{fleisig-etal-2024-linguistic,
title = "Linguistic Bias in {C}hat{GPT}: Language Models Reinforce Dialect Discrimination",
author = "Fleisig, Eve and
Smith, Genevieve and
Bossi, Madeline and
Rustagi, Ishita and
Yin, Xavier and
Klein, Dan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.750",
doi = "10.18653/v1/2024.emnlp-main.750",
pages = "13541--13564",
abstract = "We present a large-scale study of linguistic bias exhibited by ChatGPT covering ten dialects of English (Standard American English, Standard British English, and eight widely spoken non-{''}standard{''} varieties from around the world). We prompted GPT-3.5 Turbo and GPT-4 with text by native speakers of each variety and analyzed the responses via detailed linguistic feature annotation and native speaker evaluation. We find that the models default to {``}standard{''} varieties of English; based on evaluation by native speakers, we also find that model responses to non-{''}standard{''} varieties consistently exhibit a range of issues: stereotyping (19{\%} worse than for {``}standard{''} varieties), demeaning content (25{\%} worse), lack of comprehension (9{\%} worse), and condescending responses (15{\%} worse). Moreover, if these models are asked to imitate the writing style of prompts in non-{''}standard{''} varieties, they produce text that exhibits lower comprehension of the input and is especially prone to stereotyping. GPT-4 improves on GPT-3.5 in terms of comprehension, warmth, and friendliness, but also exhibits a marked increase in stereotyping (+18{\%}). The results indicate that GPT-3.5 Turbo and GPT-4 can perpetuate linguistic discrimination toward speakers of non-{''}standard{''} varieties.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fleisig-etal-2024-linguistic">
<titleInfo>
<title>Linguistic Bias in ChatGPT: Language Models Reinforce Dialect Discrimination</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eve</namePart>
<namePart type="family">Fleisig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Genevieve</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madeline</namePart>
<namePart type="family">Bossi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ishita</namePart>
<namePart type="family">Rustagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Klein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a large-scale study of linguistic bias exhibited by ChatGPT covering ten dialects of English (Standard American English, Standard British English, and eight widely spoken non-”standard” varieties from around the world). We prompted GPT-3.5 Turbo and GPT-4 with text by native speakers of each variety and analyzed the responses via detailed linguistic feature annotation and native speaker evaluation. We find that the models default to “standard” varieties of English; based on evaluation by native speakers, we also find that model responses to non-”standard” varieties consistently exhibit a range of issues: stereotyping (19% worse than for “standard” varieties), demeaning content (25% worse), lack of comprehension (9% worse), and condescending responses (15% worse). Moreover, if these models are asked to imitate the writing style of prompts in non-”standard” varieties, they produce text that exhibits lower comprehension of the input and is especially prone to stereotyping. GPT-4 improves on GPT-3.5 in terms of comprehension, warmth, and friendliness, but also exhibits a marked increase in stereotyping (+18%). The results indicate that GPT-3.5 Turbo and GPT-4 can perpetuate linguistic discrimination toward speakers of non-”standard” varieties.</abstract>
<identifier type="citekey">fleisig-etal-2024-linguistic</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.750</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.750</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>13541</start>
<end>13564</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistic Bias in ChatGPT: Language Models Reinforce Dialect Discrimination
%A Fleisig, Eve
%A Smith, Genevieve
%A Bossi, Madeline
%A Rustagi, Ishita
%A Yin, Xavier
%A Klein, Dan
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F fleisig-etal-2024-linguistic
%X We present a large-scale study of linguistic bias exhibited by ChatGPT covering ten dialects of English (Standard American English, Standard British English, and eight widely spoken non-”standard” varieties from around the world). We prompted GPT-3.5 Turbo and GPT-4 with text by native speakers of each variety and analyzed the responses via detailed linguistic feature annotation and native speaker evaluation. We find that the models default to “standard” varieties of English; based on evaluation by native speakers, we also find that model responses to non-”standard” varieties consistently exhibit a range of issues: stereotyping (19% worse than for “standard” varieties), demeaning content (25% worse), lack of comprehension (9% worse), and condescending responses (15% worse). Moreover, if these models are asked to imitate the writing style of prompts in non-”standard” varieties, they produce text that exhibits lower comprehension of the input and is especially prone to stereotyping. GPT-4 improves on GPT-3.5 in terms of comprehension, warmth, and friendliness, but also exhibits a marked increase in stereotyping (+18%). The results indicate that GPT-3.5 Turbo and GPT-4 can perpetuate linguistic discrimination toward speakers of non-”standard” varieties.
%R 10.18653/v1/2024.emnlp-main.750
%U https://aclanthology.org/2024.emnlp-main.750
%U https://doi.org/10.18653/v1/2024.emnlp-main.750
%P 13541-13564
Markdown (Informal)
[Linguistic Bias in ChatGPT: Language Models Reinforce Dialect Discrimination](https://aclanthology.org/2024.emnlp-main.750) (Fleisig et al., EMNLP 2024)
ACL