@inproceedings{lee-etal-2022-cs1qa,
title = "{CS}1{QA}: A Dataset for Assisting Code-based Question Answering in an Introductory Programming Course",
author = "Lee, Changyoon and
Seonwoo, Yeon and
Oh, Alice",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.148",
doi = "10.18653/v1/2022.naacl-main.148",
pages = "2026--2040",
abstract = "We introduce CS1QA, a dataset for code-based question answering in the programming education domain. CS1QA consists of 9,237 question-answer pairs gathered from chat logs in an introductory programming class using Python, and 17,698 unannotated chat data with code. Each question is accompanied with the student{'}s code, and the portion of the code relevant to answering the question. We carefully design the annotation process to construct CS1QA, and analyze the collected dataset in detail. The tasks for CS1QA are to predict the question type, the relevant code snippet given the question and the code and retrieving an answer from the annotated corpus.Results for the experiments on several baseline models are reported and thoroughly analyzed. The tasks for CS1QA challenge models to understand both the code and natural language. This unique dataset can be used as a benchmark for source code comprehension and question answering in the educational setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2022-cs1qa">
<titleInfo>
<title>CS1QA: A Dataset for Assisting Code-based Question Answering in an Introductory Programming Course</title>
</titleInfo>
<name type="personal">
<namePart type="given">Changyoon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yeon</namePart>
<namePart type="family">Seonwoo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce CS1QA, a dataset for code-based question answering in the programming education domain. CS1QA consists of 9,237 question-answer pairs gathered from chat logs in an introductory programming class using Python, and 17,698 unannotated chat data with code. Each question is accompanied with the student’s code, and the portion of the code relevant to answering the question. We carefully design the annotation process to construct CS1QA, and analyze the collected dataset in detail. The tasks for CS1QA are to predict the question type, the relevant code snippet given the question and the code and retrieving an answer from the annotated corpus.Results for the experiments on several baseline models are reported and thoroughly analyzed. The tasks for CS1QA challenge models to understand both the code and natural language. This unique dataset can be used as a benchmark for source code comprehension and question answering in the educational setting.</abstract>
<identifier type="citekey">lee-etal-2022-cs1qa</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.148</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.148</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>2026</start>
<end>2040</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CS1QA: A Dataset for Assisting Code-based Question Answering in an Introductory Programming Course
%A Lee, Changyoon
%A Seonwoo, Yeon
%A Oh, Alice
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F lee-etal-2022-cs1qa
%X We introduce CS1QA, a dataset for code-based question answering in the programming education domain. CS1QA consists of 9,237 question-answer pairs gathered from chat logs in an introductory programming class using Python, and 17,698 unannotated chat data with code. Each question is accompanied with the student’s code, and the portion of the code relevant to answering the question. We carefully design the annotation process to construct CS1QA, and analyze the collected dataset in detail. The tasks for CS1QA are to predict the question type, the relevant code snippet given the question and the code and retrieving an answer from the annotated corpus.Results for the experiments on several baseline models are reported and thoroughly analyzed. The tasks for CS1QA challenge models to understand both the code and natural language. This unique dataset can be used as a benchmark for source code comprehension and question answering in the educational setting.
%R 10.18653/v1/2022.naacl-main.148
%U https://aclanthology.org/2022.naacl-main.148
%U https://doi.org/10.18653/v1/2022.naacl-main.148
%P 2026-2040
Markdown (Informal)
[CS1QA: A Dataset for Assisting Code-based Question Answering in an Introductory Programming Course](https://aclanthology.org/2022.naacl-main.148) (Lee et al., NAACL 2022)
ACL