@inproceedings{hayat-etal-2025-three,
title = "A Three-Tier {LLM} Framework for Forecasting Student Engagement from Qualitative Longitudinal Data",
author = "Hayat, Ahatsham and
Martinez, Helen and
Khan, Bilal and
Hasan, Mohammad Rashedul",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.22/",
doi = "10.18653/v1/2025.conll-1.22",
pages = "334--347",
ISBN = "979-8-89176-271-8",
abstract = "Forecasting nuanced shifts in student engagement from longitudinal experiential (LE) data{---}multi-modal, qualitative trajectories of academic experiences over time{---}remains challenging due to high dimensionality and missingness. We propose a natural language processing (NLP)-driven framework using large language models (LLMs) to forecast binary engagement levels across four dimensions: Lecture Engagement Disposition, Academic Self-Efficacy, Performance Self-Evaluation, and Academic Identity and Value Perception. Evaluated on 960 trajectories from 96 first-year STEM students, our three-tier approach{---}LLM-informed imputation to generate textual descriptors for missing-not-at-random (MNAR) patterns, zero-shot feature selection via ensemble voting, and fine-tuned LLMs{---}processes textual non-cognitive responses. LLMs substantially outperform numeric baselines (e.g., Random Forest, LSTM) by capturing contextual nuances in student responses. Encoder-only LLMs surpass decoder-only variants, highlighting architectural strengths for sparse, qualitative LE data. Our framework advances NLP solutions for modeling student engagement from complex LE data, excelling where traditional methods struggle."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hayat-etal-2025-three">
<titleInfo>
<title>A Three-Tier LLM Framework for Forecasting Student Engagement from Qualitative Longitudinal Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahatsham</namePart>
<namePart type="family">Hayat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Martinez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bilal</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Rashedul</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>Forecasting nuanced shifts in student engagement from longitudinal experiential (LE) data—multi-modal, qualitative trajectories of academic experiences over time—remains challenging due to high dimensionality and missingness. We propose a natural language processing (NLP)-driven framework using large language models (LLMs) to forecast binary engagement levels across four dimensions: Lecture Engagement Disposition, Academic Self-Efficacy, Performance Self-Evaluation, and Academic Identity and Value Perception. Evaluated on 960 trajectories from 96 first-year STEM students, our three-tier approach—LLM-informed imputation to generate textual descriptors for missing-not-at-random (MNAR) patterns, zero-shot feature selection via ensemble voting, and fine-tuned LLMs—processes textual non-cognitive responses. LLMs substantially outperform numeric baselines (e.g., Random Forest, LSTM) by capturing contextual nuances in student responses. Encoder-only LLMs surpass decoder-only variants, highlighting architectural strengths for sparse, qualitative LE data. Our framework advances NLP solutions for modeling student engagement from complex LE data, excelling where traditional methods struggle.</abstract>
<identifier type="citekey">hayat-etal-2025-three</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.22</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.22/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>334</start>
<end>347</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Three-Tier LLM Framework for Forecasting Student Engagement from Qualitative Longitudinal Data
%A Hayat, Ahatsham
%A Martinez, Helen
%A Khan, Bilal
%A Hasan, Mohammad Rashedul
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F hayat-etal-2025-three
%X Forecasting nuanced shifts in student engagement from longitudinal experiential (LE) data—multi-modal, qualitative trajectories of academic experiences over time—remains challenging due to high dimensionality and missingness. We propose a natural language processing (NLP)-driven framework using large language models (LLMs) to forecast binary engagement levels across four dimensions: Lecture Engagement Disposition, Academic Self-Efficacy, Performance Self-Evaluation, and Academic Identity and Value Perception. Evaluated on 960 trajectories from 96 first-year STEM students, our three-tier approach—LLM-informed imputation to generate textual descriptors for missing-not-at-random (MNAR) patterns, zero-shot feature selection via ensemble voting, and fine-tuned LLMs—processes textual non-cognitive responses. LLMs substantially outperform numeric baselines (e.g., Random Forest, LSTM) by capturing contextual nuances in student responses. Encoder-only LLMs surpass decoder-only variants, highlighting architectural strengths for sparse, qualitative LE data. Our framework advances NLP solutions for modeling student engagement from complex LE data, excelling where traditional methods struggle.
%R 10.18653/v1/2025.conll-1.22
%U https://aclanthology.org/2025.conll-1.22/
%U https://doi.org/10.18653/v1/2025.conll-1.22
%P 334-347
Markdown (Informal)
[A Three-Tier LLM Framework for Forecasting Student Engagement from Qualitative Longitudinal Data](https://aclanthology.org/2025.conll-1.22/) (Hayat et al., CoNLL 2025)
ACL