@inproceedings{rahman-etal-2025-smollab,
title = "{S}mol{L}ab{\_}{SEU} at {BEA} 2025 Shared Task: A Transformer-Based Framework for Multi-Track Pedagogical Evaluation of {AI}-Powered Tutors",
author = "Rahman, Md. Abdur and
Amin, Md Al and
Aftahee, Sabik and
Junayed, Muhammad and
Rahman, Md Ashiqur",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bea-1.88/",
doi = "10.18653/v1/2025.bea-1.88",
pages = "1127--1134",
ISBN = "979-8-89176-270-1",
abstract = "The rapid adoption of AI in educational technology is changing learning settings, making the thorough evaluation of AI tutor pedagogical performance is quite important for promoting student success. This paper describes our solution for the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-powered tutors, which assesses tutor replies over several pedagogical dimensions. We developed transformer-based approaches for five diverse tracks: mistake identification, mistake location, providing guidance, actionability, and tutor identity prediction using the MRBench dataset of mathematical dialogues. We evaluated several pre-trained models including DeBERTa-V3, RoBERTa-Large, SciBERT, and EduBERT. Our approach addressed class imbalance problems by incorporating strategic fine-tuning with weighted loss functions. The findings show that, for all tracks, DeBERTa architectures have higher performances than the others, and our models have obtained in the competitive positions, including 9th of Tutor Identity (Exact F1 of 0.8621), 16th of Actionability (Exact F1 of 0.6284), 19th of Providing Guidance (Exact F1 of 0.4933), 20th of Mistake Identification (Exact F1 of 0.6617) and 22nd of Mistake Location (Exact F1 of 0.4935). The difference in performance over tracks highlights the difficulty of automatic pedagogical evaluation, especially for tasks whose solutions require a deep understanding of educational contexts. This work contributes to ongoing efforts to develop robust automated tools for assessing."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rahman-etal-2025-smollab">
<titleInfo>
<title>SmolLab_SEU at BEA 2025 Shared Task: A Transformer-Based Framework for Multi-Track Pedagogical Evaluation of AI-Powered Tutors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Abdur</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Al</namePart>
<namePart type="family">Amin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabik</namePart>
<namePart type="family">Aftahee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Junayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Ashiqur</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Alhafni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Bexte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaïs</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-270-1</identifier>
</relatedItem>
<abstract>The rapid adoption of AI in educational technology is changing learning settings, making the thorough evaluation of AI tutor pedagogical performance is quite important for promoting student success. This paper describes our solution for the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-powered tutors, which assesses tutor replies over several pedagogical dimensions. We developed transformer-based approaches for five diverse tracks: mistake identification, mistake location, providing guidance, actionability, and tutor identity prediction using the MRBench dataset of mathematical dialogues. We evaluated several pre-trained models including DeBERTa-V3, RoBERTa-Large, SciBERT, and EduBERT. Our approach addressed class imbalance problems by incorporating strategic fine-tuning with weighted loss functions. The findings show that, for all tracks, DeBERTa architectures have higher performances than the others, and our models have obtained in the competitive positions, including 9th of Tutor Identity (Exact F1 of 0.8621), 16th of Actionability (Exact F1 of 0.6284), 19th of Providing Guidance (Exact F1 of 0.4933), 20th of Mistake Identification (Exact F1 of 0.6617) and 22nd of Mistake Location (Exact F1 of 0.4935). The difference in performance over tracks highlights the difficulty of automatic pedagogical evaluation, especially for tasks whose solutions require a deep understanding of educational contexts. This work contributes to ongoing efforts to develop robust automated tools for assessing.</abstract>
<identifier type="citekey">rahman-etal-2025-smollab</identifier>
<identifier type="doi">10.18653/v1/2025.bea-1.88</identifier>
<location>
<url>https://aclanthology.org/2025.bea-1.88/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1127</start>
<end>1134</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SmolLab_SEU at BEA 2025 Shared Task: A Transformer-Based Framework for Multi-Track Pedagogical Evaluation of AI-Powered Tutors
%A Rahman, Md. Abdur
%A Amin, Md Al
%A Aftahee, Sabik
%A Junayed, Muhammad
%A Rahman, Md Ashiqur
%Y Kochmar, Ekaterina
%Y Alhafni, Bashar
%Y Bexte, Marie
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Laarmann-Quante, Ronja
%Y Tack, Anaïs
%Y Yaneva, Victoria
%Y Yuan, Zheng
%S Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-270-1
%F rahman-etal-2025-smollab
%X The rapid adoption of AI in educational technology is changing learning settings, making the thorough evaluation of AI tutor pedagogical performance is quite important for promoting student success. This paper describes our solution for the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-powered tutors, which assesses tutor replies over several pedagogical dimensions. We developed transformer-based approaches for five diverse tracks: mistake identification, mistake location, providing guidance, actionability, and tutor identity prediction using the MRBench dataset of mathematical dialogues. We evaluated several pre-trained models including DeBERTa-V3, RoBERTa-Large, SciBERT, and EduBERT. Our approach addressed class imbalance problems by incorporating strategic fine-tuning with weighted loss functions. The findings show that, for all tracks, DeBERTa architectures have higher performances than the others, and our models have obtained in the competitive positions, including 9th of Tutor Identity (Exact F1 of 0.8621), 16th of Actionability (Exact F1 of 0.6284), 19th of Providing Guidance (Exact F1 of 0.4933), 20th of Mistake Identification (Exact F1 of 0.6617) and 22nd of Mistake Location (Exact F1 of 0.4935). The difference in performance over tracks highlights the difficulty of automatic pedagogical evaluation, especially for tasks whose solutions require a deep understanding of educational contexts. This work contributes to ongoing efforts to develop robust automated tools for assessing.
%R 10.18653/v1/2025.bea-1.88
%U https://aclanthology.org/2025.bea-1.88/
%U https://doi.org/10.18653/v1/2025.bea-1.88
%P 1127-1134
Markdown (Informal)
[SmolLab_SEU at BEA 2025 Shared Task: A Transformer-Based Framework for Multi-Track Pedagogical Evaluation of AI-Powered Tutors](https://aclanthology.org/2025.bea-1.88/) (Rahman et al., BEA 2025)
ACL