@inproceedings{hou-etal-2025-llm,
title = "{LLM}-Human Alignment in Evaluating Teacher Questioning Practices: Beyond Ratings to Explanation",
author = {Hou, Ruikun and
F{\"u}tterer, Tim and
B{\"u}hler, Babette and
Schreyer, Patrick and
Gerjets, Peter and
Trautwein, Ulrich and
Kasneci, Enkelejda},
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://aclanthology.org/2025.aimecon-main.26/",
pages = "239--249",
ISBN = "979-8-218-84228-4",
abstract = "This study investigates the alignment between large language models (LLMs) and human raters in assessing teacher questioning practices, moving beyond rating agreement to the evidence selected to justify their decisions. Findings highlight LLMs' potential to support large-scale classroom observation through interpretable, evidence-based scoring, with possible implications for concrete teacher feedback."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hou-etal-2025-llm">
<titleInfo>
<title>LLM-Human Alignment in Evaluating Teacher Questioning Practices: Beyond Ratings to Explanation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruikun</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Fütterer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Babette</namePart>
<namePart type="family">Bühler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Schreyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Gerjets</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Trautwein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enkelejda</namePart>
<namePart type="family">Kasneci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Ormerod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalen</namePart>
<namePart type="family">Beiting Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>National Council on Measurement in Education (NCME)</publisher>
<place>
<placeTerm type="text">Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-218-84228-4</identifier>
</relatedItem>
<abstract>This study investigates the alignment between large language models (LLMs) and human raters in assessing teacher questioning practices, moving beyond rating agreement to the evidence selected to justify their decisions. Findings highlight LLMs’ potential to support large-scale classroom observation through interpretable, evidence-based scoring, with possible implications for concrete teacher feedback.</abstract>
<identifier type="citekey">hou-etal-2025-llm</identifier>
<location>
<url>https://aclanthology.org/2025.aimecon-main.26/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>239</start>
<end>249</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM-Human Alignment in Evaluating Teacher Questioning Practices: Beyond Ratings to Explanation
%A Hou, Ruikun
%A Fütterer, Tim
%A Bühler, Babette
%A Schreyer, Patrick
%A Gerjets, Peter
%A Trautwein, Ulrich
%A Kasneci, Enkelejda
%Y Wilson, Joshua
%Y Ormerod, Christopher
%Y Beiting Parrish, Magdalen
%S Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers
%D 2025
%8 October
%I National Council on Measurement in Education (NCME)
%C Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States
%@ 979-8-218-84228-4
%F hou-etal-2025-llm
%X This study investigates the alignment between large language models (LLMs) and human raters in assessing teacher questioning practices, moving beyond rating agreement to the evidence selected to justify their decisions. Findings highlight LLMs’ potential to support large-scale classroom observation through interpretable, evidence-based scoring, with possible implications for concrete teacher feedback.
%U https://aclanthology.org/2025.aimecon-main.26/
%P 239-249
Markdown (Informal)
[LLM-Human Alignment in Evaluating Teacher Questioning Practices: Beyond Ratings to Explanation](https://aclanthology.org/2025.aimecon-main.26/) (Hou et al., AIME-Con 2025)
ACL
- Ruikun Hou, Tim Fütterer, Babette Bühler, Patrick Schreyer, Peter Gerjets, Ulrich Trautwein, and Enkelejda Kasneci. 2025. LLM-Human Alignment in Evaluating Teacher Questioning Practices: Beyond Ratings to Explanation. In Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers, pages 239–249, Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States. National Council on Measurement in Education (NCME).