@inproceedings{jain-rengarajan-2025-emergent,
title = "Emergent Wisdom at {BEA} 2025 Shared Task: From Lexical Understanding to Reflective Reasoning for Pedagogical Ability Assessment",
author = "Jain, Raunak and
Rengarajan, Srinivasan",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bea-1.86/",
doi = "10.18653/v1/2025.bea-1.86",
pages = "1108--1120",
ISBN = "979-8-89176-270-1",
abstract = "For the BEA 2025 shared task on pedagogi- cal ability assessment, we introduce LUCERA (Lexical Understanding for Cue Density{--}Based Escalation and Reflective Assessment), a rubric-grounded evaluation framework for sys- tematically analyzing tutor responses across configurable pedagogical dimensions. The ar- chitecture comprises three core components: (1) a rubric-guided large language model (LLM) agent that performs lexical and dialogic cue extraction in a self-reflective, goal-driven manner; (2) a cue-complexity assessment and routing mechanism that sends high-confidence cases to a fine-tuned T5 classifier and esca- lates low-confidence or ambiguous cases to a reasoning-intensive LLM judge; and (3) an LLM-as-a-judge module that performs struc- tured, multi-step reasoning: (i) generating a domain-grounded reference solution, (ii) iden- tifying conceptual, procedural and cognitive gaps in student output, (iii) inferring the tutor{'}s instructional intent, and (iv) applying the rubric to produce justification-backed classifications. Results show that this unique combination of LLM powered feature engineering, strategic routing and rubrics for grading, enables com- petitive performance without sacrificing inter- pretability and cost effectiveness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jain-rengarajan-2025-emergent">
<titleInfo>
<title>Emergent Wisdom at BEA 2025 Shared Task: From Lexical Understanding to Reflective Reasoning for Pedagogical Ability Assessment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raunak</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivasan</namePart>
<namePart type="family">Rengarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Alhafni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Bexte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaïs</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-270-1</identifier>
</relatedItem>
<abstract>For the BEA 2025 shared task on pedagogi- cal ability assessment, we introduce LUCERA (Lexical Understanding for Cue Density–Based Escalation and Reflective Assessment), a rubric-grounded evaluation framework for sys- tematically analyzing tutor responses across configurable pedagogical dimensions. The ar- chitecture comprises three core components: (1) a rubric-guided large language model (LLM) agent that performs lexical and dialogic cue extraction in a self-reflective, goal-driven manner; (2) a cue-complexity assessment and routing mechanism that sends high-confidence cases to a fine-tuned T5 classifier and esca- lates low-confidence or ambiguous cases to a reasoning-intensive LLM judge; and (3) an LLM-as-a-judge module that performs struc- tured, multi-step reasoning: (i) generating a domain-grounded reference solution, (ii) iden- tifying conceptual, procedural and cognitive gaps in student output, (iii) inferring the tutor’s instructional intent, and (iv) applying the rubric to produce justification-backed classifications. Results show that this unique combination of LLM powered feature engineering, strategic routing and rubrics for grading, enables com- petitive performance without sacrificing inter- pretability and cost effectiveness.</abstract>
<identifier type="citekey">jain-rengarajan-2025-emergent</identifier>
<identifier type="doi">10.18653/v1/2025.bea-1.86</identifier>
<location>
<url>https://aclanthology.org/2025.bea-1.86/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1108</start>
<end>1120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Emergent Wisdom at BEA 2025 Shared Task: From Lexical Understanding to Reflective Reasoning for Pedagogical Ability Assessment
%A Jain, Raunak
%A Rengarajan, Srinivasan
%Y Kochmar, Ekaterina
%Y Alhafni, Bashar
%Y Bexte, Marie
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Laarmann-Quante, Ronja
%Y Tack, Anaïs
%Y Yaneva, Victoria
%Y Yuan, Zheng
%S Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-270-1
%F jain-rengarajan-2025-emergent
%X For the BEA 2025 shared task on pedagogi- cal ability assessment, we introduce LUCERA (Lexical Understanding for Cue Density–Based Escalation and Reflective Assessment), a rubric-grounded evaluation framework for sys- tematically analyzing tutor responses across configurable pedagogical dimensions. The ar- chitecture comprises three core components: (1) a rubric-guided large language model (LLM) agent that performs lexical and dialogic cue extraction in a self-reflective, goal-driven manner; (2) a cue-complexity assessment and routing mechanism that sends high-confidence cases to a fine-tuned T5 classifier and esca- lates low-confidence or ambiguous cases to a reasoning-intensive LLM judge; and (3) an LLM-as-a-judge module that performs struc- tured, multi-step reasoning: (i) generating a domain-grounded reference solution, (ii) iden- tifying conceptual, procedural and cognitive gaps in student output, (iii) inferring the tutor’s instructional intent, and (iv) applying the rubric to produce justification-backed classifications. Results show that this unique combination of LLM powered feature engineering, strategic routing and rubrics for grading, enables com- petitive performance without sacrificing inter- pretability and cost effectiveness.
%R 10.18653/v1/2025.bea-1.86
%U https://aclanthology.org/2025.bea-1.86/
%U https://doi.org/10.18653/v1/2025.bea-1.86
%P 1108-1120
Markdown (Informal)
[Emergent Wisdom at BEA 2025 Shared Task: From Lexical Understanding to Reflective Reasoning for Pedagogical Ability Assessment](https://aclanthology.org/2025.bea-1.86/) (Jain & Rengarajan, BEA 2025)
ACL