@inproceedings{domingo-etal-2025-human,
title = "Human ratings of {LLM} response generation in pair-programming dialogue",
author = "Domingo, Cecilia and
Piwek, Paul and
Stoyanchev, Svetlana and
Wermelinger, Michel and
Adhikari, Kaustubh and
Doddipatla, Rama Sanand",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.inlg-main.3/",
pages = "41--59",
abstract = "We take first steps in exploring whether Large Language Models (LLMs) can be adapted to dialogic learning practices, specifically pair programming {---} LLMs have primarily been implemented as programming assistants, not fully exploiting their dialogic potential. We used new dialogue data from real pair-programming interactions between students, prompting state-of-the-art LLMs to assume the role of a student, when generating a response that continues the real dialogue. We asked human annotators to rate human and AI responses on the criteria through which we operationalise the LLMs' suitability for educational dialogue: Coherence, Collaborativeness, and whether they appeared human. Results show model differences, with Llama-generated responses being rated similarly to human answers on all three criteria. Thus, for at least one of the models we investigated, the LLM utterance-level response generation appears to be suitable for pair-programming dialogue."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="domingo-etal-2025-human">
<titleInfo>
<title>Human ratings of LLM response generation in pair-programming dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cecilia</namePart>
<namePart type="family">Domingo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Piwek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svetlana</namePart>
<namePart type="family">Stoyanchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michel</namePart>
<namePart type="family">Wermelinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaustubh</namePart>
<namePart type="family">Adhikari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rama</namePart>
<namePart type="given">Sanand</namePart>
<namePart type="family">Doddipatla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashi</namePart>
<namePart type="family">Narayan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lê</namePart>
<namePart type="given">Hồng</namePart>
<namePart type="family">Phương</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahuan</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hanoi, Vietnam</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We take first steps in exploring whether Large Language Models (LLMs) can be adapted to dialogic learning practices, specifically pair programming — LLMs have primarily been implemented as programming assistants, not fully exploiting their dialogic potential. We used new dialogue data from real pair-programming interactions between students, prompting state-of-the-art LLMs to assume the role of a student, when generating a response that continues the real dialogue. We asked human annotators to rate human and AI responses on the criteria through which we operationalise the LLMs’ suitability for educational dialogue: Coherence, Collaborativeness, and whether they appeared human. Results show model differences, with Llama-generated responses being rated similarly to human answers on all three criteria. Thus, for at least one of the models we investigated, the LLM utterance-level response generation appears to be suitable for pair-programming dialogue.</abstract>
<identifier type="citekey">domingo-etal-2025-human</identifier>
<location>
<url>https://aclanthology.org/2025.inlg-main.3/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>41</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Human ratings of LLM response generation in pair-programming dialogue
%A Domingo, Cecilia
%A Piwek, Paul
%A Stoyanchev, Svetlana
%A Wermelinger, Michel
%A Adhikari, Kaustubh
%A Doddipatla, Rama Sanand
%Y Flek, Lucie
%Y Narayan, Shashi
%Y Phương, Lê Hồng
%Y Pei, Jiahuan
%S Proceedings of the 18th International Natural Language Generation Conference
%D 2025
%8 October
%I Association for Computational Linguistics
%C Hanoi, Vietnam
%F domingo-etal-2025-human
%X We take first steps in exploring whether Large Language Models (LLMs) can be adapted to dialogic learning practices, specifically pair programming — LLMs have primarily been implemented as programming assistants, not fully exploiting their dialogic potential. We used new dialogue data from real pair-programming interactions between students, prompting state-of-the-art LLMs to assume the role of a student, when generating a response that continues the real dialogue. We asked human annotators to rate human and AI responses on the criteria through which we operationalise the LLMs’ suitability for educational dialogue: Coherence, Collaborativeness, and whether they appeared human. Results show model differences, with Llama-generated responses being rated similarly to human answers on all three criteria. Thus, for at least one of the models we investigated, the LLM utterance-level response generation appears to be suitable for pair-programming dialogue.
%U https://aclanthology.org/2025.inlg-main.3/
%P 41-59
Markdown (Informal)
[Human ratings of LLM response generation in pair-programming dialogue](https://aclanthology.org/2025.inlg-main.3/) (Domingo et al., INLG 2025)
ACL
- Cecilia Domingo, Paul Piwek, Svetlana Stoyanchev, Michel Wermelinger, Kaustubh Adhikari, and Rama Sanand Doddipatla. 2025. Human ratings of LLM response generation in pair-programming dialogue. In Proceedings of the 18th International Natural Language Generation Conference, pages 41–59, Hanoi, Vietnam. Association for Computational Linguistics.