@inproceedings{alves-2025-benchmarking,
title = "Benchmarking Language Model Surprisal for Eye-Tracking Predictions in {B}razilian {P}ortuguese",
author = "Alves, Diego",
editor = "Acarturk, Cengiz and
Nasir, Jamal and
Can, Burcu and
Coltekin, Cagr{\i}",
booktitle = "Proceedings of the First International Workshop on Gaze Data and Natural Language Processing",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, BULGARIA",
url = "https://aclanthology.org/2025.gaze4nlp-1.2/",
pages = "7--17",
abstract = "This study evaluates the effectiveness of surprisal estimates from six publicly available large language models (LLMs) in predicting reading times in Brazilian Portuguese (BP), using eye-tracking data from the RastrOS corpus. We analyze three key reading time measures: first fixation duration, gaze duration, and total fixation time. Our results demonstrate that surprisal significantly predicts all three measures, with a consistently linear effect observed across all models and the strongest effect for total fixation duration. We also find that larger model size does not necessarily provide better surprisal estimates. Additionally, entropy reduction derived from Cloze norms adds minimal predictive value beyond surprisal, and only for first fixation duration. These findings replicate known surprisal effects in BP and provide novel insights into how different models and linguistic predictors influence reading time predictions."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alves-2025-benchmarking">
<titleInfo>
<title>Benchmarking Language Model Surprisal for Eye-Tracking Predictions in Brazilian Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First International Workshop on Gaze Data and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cengiz</namePart>
<namePart type="family">Acarturk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jamal</namePart>
<namePart type="family">Nasir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burcu</namePart>
<namePart type="family">Can</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cagrı</namePart>
<namePart type="family">Coltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, BULGARIA</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study evaluates the effectiveness of surprisal estimates from six publicly available large language models (LLMs) in predicting reading times in Brazilian Portuguese (BP), using eye-tracking data from the RastrOS corpus. We analyze three key reading time measures: first fixation duration, gaze duration, and total fixation time. Our results demonstrate that surprisal significantly predicts all three measures, with a consistently linear effect observed across all models and the strongest effect for total fixation duration. We also find that larger model size does not necessarily provide better surprisal estimates. Additionally, entropy reduction derived from Cloze norms adds minimal predictive value beyond surprisal, and only for first fixation duration. These findings replicate known surprisal effects in BP and provide novel insights into how different models and linguistic predictors influence reading time predictions.</abstract>
<identifier type="citekey">alves-2025-benchmarking</identifier>
<location>
<url>https://aclanthology.org/2025.gaze4nlp-1.2/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>7</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking Language Model Surprisal for Eye-Tracking Predictions in Brazilian Portuguese
%A Alves, Diego
%Y Acarturk, Cengiz
%Y Nasir, Jamal
%Y Can, Burcu
%Y Coltekin, Cagrı
%S Proceedings of the First International Workshop on Gaze Data and Natural Language Processing
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, BULGARIA
%C Varna, Bulgaria
%F alves-2025-benchmarking
%X This study evaluates the effectiveness of surprisal estimates from six publicly available large language models (LLMs) in predicting reading times in Brazilian Portuguese (BP), using eye-tracking data from the RastrOS corpus. We analyze three key reading time measures: first fixation duration, gaze duration, and total fixation time. Our results demonstrate that surprisal significantly predicts all three measures, with a consistently linear effect observed across all models and the strongest effect for total fixation duration. We also find that larger model size does not necessarily provide better surprisal estimates. Additionally, entropy reduction derived from Cloze norms adds minimal predictive value beyond surprisal, and only for first fixation duration. These findings replicate known surprisal effects in BP and provide novel insights into how different models and linguistic predictors influence reading time predictions.
%U https://aclanthology.org/2025.gaze4nlp-1.2/
%P 7-17
Markdown (Informal)
[Benchmarking Language Model Surprisal for Eye-Tracking Predictions in Brazilian Portuguese](https://aclanthology.org/2025.gaze4nlp-1.2/) (Alves, Gaze4NLP 2025)
ACL