@inproceedings{perkoff-etal-2024-keep,
title = "{``}Keep up the good work!{''}: Using Constraints in Zero Shot Prompting to Generate Supportive Teacher Responses",
author = "Perkoff, E. Margaret and
Ramirez, Angela Maria and
von Bayern, Sean and
Walker, Marilyn and
Martin, James",
editor = "Kawahara, Tatsuya and
Demberg, Vera and
Ultes, Stefan and
Inoue, Koji and
Mehri, Shikib and
Howcroft, David and
Komatani, Kazunori",
booktitle = "Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2024",
address = "Kyoto, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigdial-1.11",
doi = "10.18653/v1/2024.sigdial-1.11",
pages = "121--138",
abstract = "Educational dialogue systems have been used to support students and teachers for decades. Such systems rely on explicit pedagogically motivated dialogue rules. With the ease of integrating large language models (LLMs) into dialogue systems, applications have been arising that directly use model responses without the use of human-written rules, raising concerns about their use in classroom settings. Here, we explore how to constrain LLM outputs to generate appropriate and supportive teacher-like responses. We present results comparing the effectiveness of different constraint variations in a zero-shot prompting setting on a large mathematics classroom corpus. Generated outputs are evaluated with human annotation for Fluency, Relevance, Helpfulness, and Adherence to the provided constraints. Including all constraints in the prompt led to the highest values for Fluency and Helpfulness, and the second highest value for Relevance. The annotation results also demonstrate that the prompts that result in the highest adherence to constraints do not necessarily indicate higher perceived scores for Fluency, Relevance, or Helpfulness. In a direct comparison, all of the non-baseline LLM responses were ranked higher than the actual teacher responses in the corpus over 50{\%} of the time.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perkoff-etal-2024-keep">
<titleInfo>
<title>“Keep up the good work!”: Using Constraints in Zero Shot Prompting to Generate Supportive Teacher Responses</title>
</titleInfo>
<name type="personal">
<namePart type="given">E</namePart>
<namePart type="given">Margaret</namePart>
<namePart type="family">Perkoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Ramirez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">von Bayern</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marilyn</namePart>
<namePart type="family">Walker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Martin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koji</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikib</namePart>
<namePart type="family">Mehri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazunori</namePart>
<namePart type="family">Komatani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyoto, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Educational dialogue systems have been used to support students and teachers for decades. Such systems rely on explicit pedagogically motivated dialogue rules. With the ease of integrating large language models (LLMs) into dialogue systems, applications have been arising that directly use model responses without the use of human-written rules, raising concerns about their use in classroom settings. Here, we explore how to constrain LLM outputs to generate appropriate and supportive teacher-like responses. We present results comparing the effectiveness of different constraint variations in a zero-shot prompting setting on a large mathematics classroom corpus. Generated outputs are evaluated with human annotation for Fluency, Relevance, Helpfulness, and Adherence to the provided constraints. Including all constraints in the prompt led to the highest values for Fluency and Helpfulness, and the second highest value for Relevance. The annotation results also demonstrate that the prompts that result in the highest adherence to constraints do not necessarily indicate higher perceived scores for Fluency, Relevance, or Helpfulness. In a direct comparison, all of the non-baseline LLM responses were ranked higher than the actual teacher responses in the corpus over 50% of the time.</abstract>
<identifier type="citekey">perkoff-etal-2024-keep</identifier>
<identifier type="doi">10.18653/v1/2024.sigdial-1.11</identifier>
<location>
<url>https://aclanthology.org/2024.sigdial-1.11</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>121</start>
<end>138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T “Keep up the good work!”: Using Constraints in Zero Shot Prompting to Generate Supportive Teacher Responses
%A Perkoff, E. Margaret
%A Ramirez, Angela Maria
%A von Bayern, Sean
%A Walker, Marilyn
%A Martin, James
%Y Kawahara, Tatsuya
%Y Demberg, Vera
%Y Ultes, Stefan
%Y Inoue, Koji
%Y Mehri, Shikib
%Y Howcroft, David
%Y Komatani, Kazunori
%S Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2024
%8 September
%I Association for Computational Linguistics
%C Kyoto, Japan
%F perkoff-etal-2024-keep
%X Educational dialogue systems have been used to support students and teachers for decades. Such systems rely on explicit pedagogically motivated dialogue rules. With the ease of integrating large language models (LLMs) into dialogue systems, applications have been arising that directly use model responses without the use of human-written rules, raising concerns about their use in classroom settings. Here, we explore how to constrain LLM outputs to generate appropriate and supportive teacher-like responses. We present results comparing the effectiveness of different constraint variations in a zero-shot prompting setting on a large mathematics classroom corpus. Generated outputs are evaluated with human annotation for Fluency, Relevance, Helpfulness, and Adherence to the provided constraints. Including all constraints in the prompt led to the highest values for Fluency and Helpfulness, and the second highest value for Relevance. The annotation results also demonstrate that the prompts that result in the highest adherence to constraints do not necessarily indicate higher perceived scores for Fluency, Relevance, or Helpfulness. In a direct comparison, all of the non-baseline LLM responses were ranked higher than the actual teacher responses in the corpus over 50% of the time.
%R 10.18653/v1/2024.sigdial-1.11
%U https://aclanthology.org/2024.sigdial-1.11
%U https://doi.org/10.18653/v1/2024.sigdial-1.11
%P 121-138
Markdown (Informal)
[“Keep up the good work!”: Using Constraints in Zero Shot Prompting to Generate Supportive Teacher Responses](https://aclanthology.org/2024.sigdial-1.11) (Perkoff et al., SIGDIAL 2024)
ACL