@inproceedings{sonkar-etal-2024-pedagogical,
title = "Pedagogical Alignment of Large Language Models",
author = "Sonkar, Shashank and
Ni, Kangqi and
Chaudhary, Sapana and
Baraniuk, Richard",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.797",
pages = "13641--13650",
abstract = "Large Language Models (LLMs), when used in educational settings without pedagogical fine-tuning, often provide immediate answers rather than guiding students through the problem-solving process. This approach falls short of pedagogically best practices and limits their effectiveness as educational tools. We term the objective of training LLMs to emulate effective teaching strategies as {`}pedagogical alignment.{'} In this paper, we investigate Learning from Human Preferences () algorithms to achieve this alignment objective. A key challenge in this process is the scarcity of high-quality preference datasets to guide the alignment. To address this, we propose a novel approach for constructing a large-scale dataset using synthetic data generation techniques, eliminating the need for time-consuming and costly manual annotation. Leveraging this dataset, our experiments with Llama and Mistral models demonstrate that LHP methods outperform standard supervised fine-tuning (SFT), improving pedagogical alignment accuracy by 13.1{\%} and 8.7{\%} respectively.Existing evaluation methods also lack quantitative metrics to adequately measure the pedagogical alignment of LLMs. To address this gap, we propose novel perplexity-based metrics that quantify LLMs{'} tendency to provide scaffolded guidance versus direct answers, offering a robust measure of pedagogical alignment. Our analysis provides compelling evidence for the superiority of methods over SFT in optimizing LLMs{'} behavior, underscoring the potential of methods in better aligning LLMs with educational objectives and fostering effective learning experiences. Code and models are available here.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sonkar-etal-2024-pedagogical">
<titleInfo>
<title>Pedagogical Alignment of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shashank</namePart>
<namePart type="family">Sonkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kangqi</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sapana</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Baraniuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs), when used in educational settings without pedagogical fine-tuning, often provide immediate answers rather than guiding students through the problem-solving process. This approach falls short of pedagogically best practices and limits their effectiveness as educational tools. We term the objective of training LLMs to emulate effective teaching strategies as ‘pedagogical alignment.’ In this paper, we investigate Learning from Human Preferences () algorithms to achieve this alignment objective. A key challenge in this process is the scarcity of high-quality preference datasets to guide the alignment. To address this, we propose a novel approach for constructing a large-scale dataset using synthetic data generation techniques, eliminating the need for time-consuming and costly manual annotation. Leveraging this dataset, our experiments with Llama and Mistral models demonstrate that LHP methods outperform standard supervised fine-tuning (SFT), improving pedagogical alignment accuracy by 13.1% and 8.7% respectively.Existing evaluation methods also lack quantitative metrics to adequately measure the pedagogical alignment of LLMs. To address this gap, we propose novel perplexity-based metrics that quantify LLMs’ tendency to provide scaffolded guidance versus direct answers, offering a robust measure of pedagogical alignment. Our analysis provides compelling evidence for the superiority of methods over SFT in optimizing LLMs’ behavior, underscoring the potential of methods in better aligning LLMs with educational objectives and fostering effective learning experiences. Code and models are available here.</abstract>
<identifier type="citekey">sonkar-etal-2024-pedagogical</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.797</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>13641</start>
<end>13650</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pedagogical Alignment of Large Language Models
%A Sonkar, Shashank
%A Ni, Kangqi
%A Chaudhary, Sapana
%A Baraniuk, Richard
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F sonkar-etal-2024-pedagogical
%X Large Language Models (LLMs), when used in educational settings without pedagogical fine-tuning, often provide immediate answers rather than guiding students through the problem-solving process. This approach falls short of pedagogically best practices and limits their effectiveness as educational tools. We term the objective of training LLMs to emulate effective teaching strategies as ‘pedagogical alignment.’ In this paper, we investigate Learning from Human Preferences () algorithms to achieve this alignment objective. A key challenge in this process is the scarcity of high-quality preference datasets to guide the alignment. To address this, we propose a novel approach for constructing a large-scale dataset using synthetic data generation techniques, eliminating the need for time-consuming and costly manual annotation. Leveraging this dataset, our experiments with Llama and Mistral models demonstrate that LHP methods outperform standard supervised fine-tuning (SFT), improving pedagogical alignment accuracy by 13.1% and 8.7% respectively.Existing evaluation methods also lack quantitative metrics to adequately measure the pedagogical alignment of LLMs. To address this gap, we propose novel perplexity-based metrics that quantify LLMs’ tendency to provide scaffolded guidance versus direct answers, offering a robust measure of pedagogical alignment. Our analysis provides compelling evidence for the superiority of methods over SFT in optimizing LLMs’ behavior, underscoring the potential of methods in better aligning LLMs with educational objectives and fostering effective learning experiences. Code and models are available here.
%U https://aclanthology.org/2024.findings-emnlp.797
%P 13641-13650
Markdown (Informal)
[Pedagogical Alignment of Large Language Models](https://aclanthology.org/2024.findings-emnlp.797) (Sonkar et al., Findings 2024)
ACL
- Shashank Sonkar, Kangqi Ni, Sapana Chaudhary, and Richard Baraniuk. 2024. Pedagogical Alignment of Large Language Models. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 13641–13650, Miami, Florida, USA. Association for Computational Linguistics.