@inproceedings{patel-pavlick-2021-stated,
title = "{``}Was it {``}stated{''} or was it {``}claimed{''}?: How linguistic bias affects generative language models",
author = "Patel, Roma and
Pavlick, Ellie",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.790",
doi = "10.18653/v1/2021.emnlp-main.790",
pages = "10080--10095",
abstract = "People use language in subtle and nuanced ways to convey their beliefs. For instance, saying \textit{claimed} instead of \textit{said} casts doubt on the truthfulness of the underlying proposition, thus representing the author{'}s opinion on the matter. Several works have identified such linguistic classes of words that occur frequently in natural language text and are bias-inducing by virtue of their framing effects. In this paper, we test whether generative language models (including GPT-2 (CITATION) are sensitive to these linguistic framing effects. In particular, we test whether prompts that contain linguistic markers of author bias (e.g., hedges, implicatives, subjective intensifiers, assertives) influence the distribution of the generated text. Although these framing effects are subtle and stylistic, we find evidence that they lead to measurable style and topic differences in the generated text, leading to language that is, on average, more polarised and more skewed towards controversial entities and events.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patel-pavlick-2021-stated">
<titleInfo>
<title>“Was it “stated” or was it “claimed”?: How linguistic bias affects generative language models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roma</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellie</namePart>
<namePart type="family">Pavlick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>People use language in subtle and nuanced ways to convey their beliefs. For instance, saying claimed instead of said casts doubt on the truthfulness of the underlying proposition, thus representing the author’s opinion on the matter. Several works have identified such linguistic classes of words that occur frequently in natural language text and are bias-inducing by virtue of their framing effects. In this paper, we test whether generative language models (including GPT-2 (CITATION) are sensitive to these linguistic framing effects. In particular, we test whether prompts that contain linguistic markers of author bias (e.g., hedges, implicatives, subjective intensifiers, assertives) influence the distribution of the generated text. Although these framing effects are subtle and stylistic, we find evidence that they lead to measurable style and topic differences in the generated text, leading to language that is, on average, more polarised and more skewed towards controversial entities and events.</abstract>
<identifier type="citekey">patel-pavlick-2021-stated</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.790</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.790</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>10080</start>
<end>10095</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T “Was it “stated” or was it “claimed”?: How linguistic bias affects generative language models
%A Patel, Roma
%A Pavlick, Ellie
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F patel-pavlick-2021-stated
%X People use language in subtle and nuanced ways to convey their beliefs. For instance, saying claimed instead of said casts doubt on the truthfulness of the underlying proposition, thus representing the author’s opinion on the matter. Several works have identified such linguistic classes of words that occur frequently in natural language text and are bias-inducing by virtue of their framing effects. In this paper, we test whether generative language models (including GPT-2 (CITATION) are sensitive to these linguistic framing effects. In particular, we test whether prompts that contain linguistic markers of author bias (e.g., hedges, implicatives, subjective intensifiers, assertives) influence the distribution of the generated text. Although these framing effects are subtle and stylistic, we find evidence that they lead to measurable style and topic differences in the generated text, leading to language that is, on average, more polarised and more skewed towards controversial entities and events.
%R 10.18653/v1/2021.emnlp-main.790
%U https://aclanthology.org/2021.emnlp-main.790
%U https://doi.org/10.18653/v1/2021.emnlp-main.790
%P 10080-10095
Markdown (Informal)
[“Was it “stated” or was it “claimed”?: How linguistic bias affects generative language models](https://aclanthology.org/2021.emnlp-main.790) (Patel & Pavlick, EMNLP 2021)
ACL