@inproceedings{khodja-etal-2025-factual,
title = "Factual Knowledge in Language Models: Robustness and Anomalies under Simple Temporal Context Variations",
author = "Khodja, Hichem Ammar and
Bechet, Frederic and
Brabant, Quentin and
Nasr, Alexis and
Lecorv{\'e}, Gw{\'e}nol{\'e}",
editor = "Jia, Robin and
Wallace, Eric and
Huang, Yangsibo and
Pimentel, Tiago and
Maini, Pratyush and
Dankers, Verna and
Wei, Johnny and
Lesci, Pietro",
booktitle = "Proceedings of the First Workshop on Large Language Model Memorization (L2M2)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.l2m2-1.1/",
doi = "10.18653/v1/2025.l2m2-1.1",
pages = "1--22",
ISBN = "979-8-89176-278-7",
abstract = "This paper explores the robustness of language models (LMs) to variations in the temporal context within factual knowledge. It examines whether LMs can correctly associate a temporal context with a past fact valid over a defined period, by asking them to differentiate correct from incorrect contexts. The LMs' ability to distinguish is analyzed along two dimensions: the distance of the incorrect context from the validity period and the granularity of the context. To this end, a dataset called TimeStress is introduced, enabling the evaluation of 18 diverse LMs. Results reveal that the best LM achieves a perfect distinction for only 11{\%} of the studied facts, with errors, certainly rare, but critical that humans would not make. This work highlights the limitations of current LMs in temporal representation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khodja-etal-2025-factual">
<titleInfo>
<title>Factual Knowledge in Language Models: Robustness and Anomalies under Simple Temporal Context Variations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hichem</namePart>
<namePart type="given">Ammar</namePart>
<namePart type="family">Khodja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Bechet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quentin</namePart>
<namePart type="family">Brabant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Nasr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gwénolé</namePart>
<namePart type="family">Lecorvé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Large Language Model Memorization (L2M2)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yangsibo</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="family">Pimentel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pratyush</namePart>
<namePart type="family">Maini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johnny</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pietro</namePart>
<namePart type="family">Lesci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-278-7</identifier>
</relatedItem>
<abstract>This paper explores the robustness of language models (LMs) to variations in the temporal context within factual knowledge. It examines whether LMs can correctly associate a temporal context with a past fact valid over a defined period, by asking them to differentiate correct from incorrect contexts. The LMs’ ability to distinguish is analyzed along two dimensions: the distance of the incorrect context from the validity period and the granularity of the context. To this end, a dataset called TimeStress is introduced, enabling the evaluation of 18 diverse LMs. Results reveal that the best LM achieves a perfect distinction for only 11% of the studied facts, with errors, certainly rare, but critical that humans would not make. This work highlights the limitations of current LMs in temporal representation.</abstract>
<identifier type="citekey">khodja-etal-2025-factual</identifier>
<identifier type="doi">10.18653/v1/2025.l2m2-1.1</identifier>
<location>
<url>https://aclanthology.org/2025.l2m2-1.1/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>1</start>
<end>22</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Factual Knowledge in Language Models: Robustness and Anomalies under Simple Temporal Context Variations
%A Khodja, Hichem Ammar
%A Bechet, Frederic
%A Brabant, Quentin
%A Nasr, Alexis
%A Lecorvé, Gwénolé
%Y Jia, Robin
%Y Wallace, Eric
%Y Huang, Yangsibo
%Y Pimentel, Tiago
%Y Maini, Pratyush
%Y Dankers, Verna
%Y Wei, Johnny
%Y Lesci, Pietro
%S Proceedings of the First Workshop on Large Language Model Memorization (L2M2)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-278-7
%F khodja-etal-2025-factual
%X This paper explores the robustness of language models (LMs) to variations in the temporal context within factual knowledge. It examines whether LMs can correctly associate a temporal context with a past fact valid over a defined period, by asking them to differentiate correct from incorrect contexts. The LMs’ ability to distinguish is analyzed along two dimensions: the distance of the incorrect context from the validity period and the granularity of the context. To this end, a dataset called TimeStress is introduced, enabling the evaluation of 18 diverse LMs. Results reveal that the best LM achieves a perfect distinction for only 11% of the studied facts, with errors, certainly rare, but critical that humans would not make. This work highlights the limitations of current LMs in temporal representation.
%R 10.18653/v1/2025.l2m2-1.1
%U https://aclanthology.org/2025.l2m2-1.1/
%U https://doi.org/10.18653/v1/2025.l2m2-1.1
%P 1-22
Markdown (Informal)
[Factual Knowledge in Language Models: Robustness and Anomalies under Simple Temporal Context Variations](https://aclanthology.org/2025.l2m2-1.1/) (Khodja et al., L2M2 2025)
ACL