@inproceedings{stasaski-etal-2020-cima,
title = "{CIMA}: A Large Open Access Dialogue Dataset for Tutoring",
author = "Stasaski, Katherine and
Kao, Kimberly and
Hearst, Marti A.",
editor = "Burstein, Jill and
Kochmar, Ekaterina and
Leacock, Claudia and
Madnani, Nitin and
Pil{\'a}n, Ildik{\'o} and
Yannakoudakis, Helen and
Zesch, Torsten",
booktitle = "Proceedings of the Fifteenth Workshop on Innovative Use of NLP for Building Educational Applications",
month = jul,
year = "2020",
address = "Seattle, WA, USA → Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.bea-1.5",
doi = "10.18653/v1/2020.bea-1.5",
pages = "52--64",
abstract = "One-to-one tutoring is often an effective means to help students learn, and recent experiments with neural conversation systems are promising. However, large open datasets of tutoring conversations are lacking. To remedy this, we propose a novel asynchronous method for collecting tutoring dialogue via crowdworkers that is both amenable to the needs of deep learning algorithms and reflective of pedagogical concerns. In this approach, extended conversations are obtained between crowdworkers role-playing as both students and tutors. The CIMA collection, which we make publicly available, is novel in that students are exposed to overlapping grounded concepts between exercises and multiple relevant tutoring responses are collected for the same input. CIMA contains several compelling properties from an educational perspective: student role-players complete exercises in fewer turns during the course of the conversation and tutor players adopt strategies that conform with some educational conversational norms, such as providing hints versus asking questions in appropriate contexts. The dataset enables a model to be trained to generate the next tutoring utterance in a conversation, conditioned on a provided action strategy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stasaski-etal-2020-cima">
<titleInfo>
<title>CIMA: A Large Open Access Dialogue Dataset for Tutoring</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Stasaski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kimberly</namePart>
<namePart type="family">Kao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marti</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Hearst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifteenth Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, WA, USA → Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One-to-one tutoring is often an effective means to help students learn, and recent experiments with neural conversation systems are promising. However, large open datasets of tutoring conversations are lacking. To remedy this, we propose a novel asynchronous method for collecting tutoring dialogue via crowdworkers that is both amenable to the needs of deep learning algorithms and reflective of pedagogical concerns. In this approach, extended conversations are obtained between crowdworkers role-playing as both students and tutors. The CIMA collection, which we make publicly available, is novel in that students are exposed to overlapping grounded concepts between exercises and multiple relevant tutoring responses are collected for the same input. CIMA contains several compelling properties from an educational perspective: student role-players complete exercises in fewer turns during the course of the conversation and tutor players adopt strategies that conform with some educational conversational norms, such as providing hints versus asking questions in appropriate contexts. The dataset enables a model to be trained to generate the next tutoring utterance in a conversation, conditioned on a provided action strategy.</abstract>
<identifier type="citekey">stasaski-etal-2020-cima</identifier>
<identifier type="doi">10.18653/v1/2020.bea-1.5</identifier>
<location>
<url>https://aclanthology.org/2020.bea-1.5</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>52</start>
<end>64</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CIMA: A Large Open Access Dialogue Dataset for Tutoring
%A Stasaski, Katherine
%A Kao, Kimberly
%A Hearst, Marti A.
%Y Burstein, Jill
%Y Kochmar, Ekaterina
%Y Leacock, Claudia
%Y Madnani, Nitin
%Y Pilán, Ildikó
%Y Yannakoudakis, Helen
%Y Zesch, Torsten
%S Proceedings of the Fifteenth Workshop on Innovative Use of NLP for Building Educational Applications
%D 2020
%8 July
%I Association for Computational Linguistics
%C Seattle, WA, USA → Online
%F stasaski-etal-2020-cima
%X One-to-one tutoring is often an effective means to help students learn, and recent experiments with neural conversation systems are promising. However, large open datasets of tutoring conversations are lacking. To remedy this, we propose a novel asynchronous method for collecting tutoring dialogue via crowdworkers that is both amenable to the needs of deep learning algorithms and reflective of pedagogical concerns. In this approach, extended conversations are obtained between crowdworkers role-playing as both students and tutors. The CIMA collection, which we make publicly available, is novel in that students are exposed to overlapping grounded concepts between exercises and multiple relevant tutoring responses are collected for the same input. CIMA contains several compelling properties from an educational perspective: student role-players complete exercises in fewer turns during the course of the conversation and tutor players adopt strategies that conform with some educational conversational norms, such as providing hints versus asking questions in appropriate contexts. The dataset enables a model to be trained to generate the next tutoring utterance in a conversation, conditioned on a provided action strategy.
%R 10.18653/v1/2020.bea-1.5
%U https://aclanthology.org/2020.bea-1.5
%U https://doi.org/10.18653/v1/2020.bea-1.5
%P 52-64
Markdown (Informal)
[CIMA: A Large Open Access Dialogue Dataset for Tutoring](https://aclanthology.org/2020.bea-1.5) (Stasaski et al., BEA 2020)
ACL
- Katherine Stasaski, Kimberly Kao, and Marti A. Hearst. 2020. CIMA: A Large Open Access Dialogue Dataset for Tutoring. In Proceedings of the Fifteenth Workshop on Innovative Use of NLP for Building Educational Applications, pages 52–64, Seattle, WA, USA → Online. Association for Computational Linguistics.