@inproceedings{pastorino-moosavi-2026-frame,
title = "Frame In, Frame Out: Measuring Framing Bias in {LLM}-Generated News Summaries",
author = "Pastorino, Valeria and
Moosavi, Nafise Sadat",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.25/",
pages = "378--384",
ISBN = "979-8-89176-413-2",
abstract = "News headlines and summaries shape how events are interpreted through selective emphasis and omission, a phenomenon commonly referred to as framing. Large language models are now routinely used to generate such content, yet existing evaluation frameworks largely overlook this dimension. We introduce Frame In, Frame Out (FIFO), the first large-scale benchmark for measuring framing presence in LLM-generated news summaries, grounded in the widely used XSum dataset. FIFO combines 15,499 jury-annotated examples with 320 expert-labeled instances ($\kappa = 0.61$) to validate and calibrate model-based annotations. Using FIFO, we analyze measured framing rates across 27 summarization models. We find that LLM-generated summaries often exhibit higher calibrated framing rates than human-written references, with substantial variation across topics and training regimes, including elevated rates in scientific and public health summaries. Our results establish framing as an underexplored and consequential dimension of summarization quality."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pastorino-moosavi-2026-frame">
<titleInfo>
<title>Frame In, Frame Out: Measuring Framing Bias in LLM-Generated News Summaries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valeria</namePart>
<namePart type="family">Pastorino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>News headlines and summaries shape how events are interpreted through selective emphasis and omission, a phenomenon commonly referred to as framing. Large language models are now routinely used to generate such content, yet existing evaluation frameworks largely overlook this dimension. We introduce Frame In, Frame Out (FIFO), the first large-scale benchmark for measuring framing presence in LLM-generated news summaries, grounded in the widely used XSum dataset. FIFO combines 15,499 jury-annotated examples with 320 expert-labeled instances (ąppa = 0.61) to validate and calibrate model-based annotations. Using FIFO, we analyze measured framing rates across 27 summarization models. We find that LLM-generated summaries often exhibit higher calibrated framing rates than human-written references, with substantial variation across topics and training regimes, including elevated rates in scientific and public health summaries. Our results establish framing as an underexplored and consequential dimension of summarization quality.</abstract>
<identifier type="citekey">pastorino-moosavi-2026-frame</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.25/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>378</start>
<end>384</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Frame In, Frame Out: Measuring Framing Bias in LLM-Generated News Summaries
%A Pastorino, Valeria
%A Moosavi, Nafise Sadat
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F pastorino-moosavi-2026-frame
%X News headlines and summaries shape how events are interpreted through selective emphasis and omission, a phenomenon commonly referred to as framing. Large language models are now routinely used to generate such content, yet existing evaluation frameworks largely overlook this dimension. We introduce Frame In, Frame Out (FIFO), the first large-scale benchmark for measuring framing presence in LLM-generated news summaries, grounded in the widely used XSum dataset. FIFO combines 15,499 jury-annotated examples with 320 expert-labeled instances (ąppa = 0.61) to validate and calibrate model-based annotations. Using FIFO, we analyze measured framing rates across 27 summarization models. We find that LLM-generated summaries often exhibit higher calibrated framing rates than human-written references, with substantial variation across topics and training regimes, including elevated rates in scientific and public health summaries. Our results establish framing as an underexplored and consequential dimension of summarization quality.
%U https://aclanthology.org/2026.starsem-conference.25/
%P 378-384
Markdown (Informal)
[Frame In, Frame Out: Measuring Framing Bias in LLM-Generated News Summaries](https://aclanthology.org/2026.starsem-conference.25/) (Pastorino & Moosavi, *SEM 2026)
ACL