@article{rottger-etal-2026-issuebench,
title = "{I}ssue{B}ench: Millions of Realistic Prompts for Measuring Issue Bias in {LLM} Writing Assistance",
author = {R{\"o}ttger, Paul and
Hinck, Musashi and
Hofmann, Valentin and
Hackenburg, Kobi and
Pyatkin, Valentina and
Brahman, Faeze and
Hovy, Dirk},
journal = "Transactions of the Association for Computational Linguistics",
volume = "14",
year = "2026",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2026.tacl-1.16/",
doi = "10.1162/tacl.a.626",
pages = "318--340",
abstract = "Large language models (LLMs) are helping millions of users write texts about diverse issues, and in doing so expose users to different ideas and perspectives. This creates concerns about issue bias, where an LLM tends to present just one perspective on a given issue, which in turn may influence how users think about this issue. So far, it has not been possible to measure which issue biases LLMs manifest in real user interactions, making it difficult to address the risks from biased LLMs. Therefore, we create IssueBench: a set of 2.49m realistic English-language prompts to measure issue bias in LLM writing assistance, which we construct based on 3.9k templates (e.g., ``write a blog about'') and 212 political issues (e.g., ``AI regulation'') from real user interactions. Using IssueBench, we show that issue biases are common and persistent in 10 state-of-the-art LLMs. We also show that biases are very similar across models, and that all models align more with US Democrat than Republican voter opinion on a subset of issues. IssueBench can easily be adapted to include other issues, templates, or tasks. By enabling robust and realistic measurement, we hope that IssueBench can bring a new quality of evidence to ongoing discussions about LLM biases and how to address them."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rottger-etal-2026-issuebench">
<titleInfo>
<title>IssueBench: Millions of Realistic Prompts for Measuring Issue Bias in LLM Writing Assistance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Röttger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Musashi</namePart>
<namePart type="family">Hinck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Hofmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kobi</namePart>
<namePart type="family">Hackenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentina</namePart>
<namePart type="family">Pyatkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faeze</namePart>
<namePart type="family">Brahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Large language models (LLMs) are helping millions of users write texts about diverse issues, and in doing so expose users to different ideas and perspectives. This creates concerns about issue bias, where an LLM tends to present just one perspective on a given issue, which in turn may influence how users think about this issue. So far, it has not been possible to measure which issue biases LLMs manifest in real user interactions, making it difficult to address the risks from biased LLMs. Therefore, we create IssueBench: a set of 2.49m realistic English-language prompts to measure issue bias in LLM writing assistance, which we construct based on 3.9k templates (e.g., “write a blog about”) and 212 political issues (e.g., “AI regulation”) from real user interactions. Using IssueBench, we show that issue biases are common and persistent in 10 state-of-the-art LLMs. We also show that biases are very similar across models, and that all models align more with US Democrat than Republican voter opinion on a subset of issues. IssueBench can easily be adapted to include other issues, templates, or tasks. By enabling robust and realistic measurement, we hope that IssueBench can bring a new quality of evidence to ongoing discussions about LLM biases and how to address them.</abstract>
<identifier type="citekey">rottger-etal-2026-issuebench</identifier>
<identifier type="doi">10.1162/tacl.a.626</identifier>
<location>
<url>https://aclanthology.org/2026.tacl-1.16/</url>
</location>
<part>
<date>2026</date>
<detail type="volume"><number>14</number></detail>
<extent unit="page">
<start>318</start>
<end>340</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T IssueBench: Millions of Realistic Prompts for Measuring Issue Bias in LLM Writing Assistance
%A Röttger, Paul
%A Hinck, Musashi
%A Hofmann, Valentin
%A Hackenburg, Kobi
%A Pyatkin, Valentina
%A Brahman, Faeze
%A Hovy, Dirk
%J Transactions of the Association for Computational Linguistics
%D 2026
%V 14
%I MIT Press
%C Cambridge, MA
%F rottger-etal-2026-issuebench
%X Large language models (LLMs) are helping millions of users write texts about diverse issues, and in doing so expose users to different ideas and perspectives. This creates concerns about issue bias, where an LLM tends to present just one perspective on a given issue, which in turn may influence how users think about this issue. So far, it has not been possible to measure which issue biases LLMs manifest in real user interactions, making it difficult to address the risks from biased LLMs. Therefore, we create IssueBench: a set of 2.49m realistic English-language prompts to measure issue bias in LLM writing assistance, which we construct based on 3.9k templates (e.g., “write a blog about”) and 212 political issues (e.g., “AI regulation”) from real user interactions. Using IssueBench, we show that issue biases are common and persistent in 10 state-of-the-art LLMs. We also show that biases are very similar across models, and that all models align more with US Democrat than Republican voter opinion on a subset of issues. IssueBench can easily be adapted to include other issues, templates, or tasks. By enabling robust and realistic measurement, we hope that IssueBench can bring a new quality of evidence to ongoing discussions about LLM biases and how to address them.
%R 10.1162/tacl.a.626
%U https://aclanthology.org/2026.tacl-1.16/
%U https://doi.org/10.1162/tacl.a.626
%P 318-340
Markdown (Informal)
[IssueBench: Millions of Realistic Prompts for Measuring Issue Bias in LLM Writing Assistance](https://aclanthology.org/2026.tacl-1.16/) (Röttger et al., TACL 2026)
ACL