@inproceedings{raja-etal-2025-dataset,
title = "A Dataset for Programming-based Instructional Video Classification and Question Answering",
author = "Raja, Sana Javaid and
Zafar, Adeel and
Shoaib, Aqsa",
editor = "Zhang, Wei Emma and
Dai, Xiang and
Elliot, Desmond and
Fang, Byron and
Sim, Mongyuan and
Zhuang, Haojie and
Chen, Weitong",
booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.evalmg-1.1/",
pages = "1--9",
abstract = "This work aims to develop an understanding of the rapidly emerging field of VideoQA, particularly in the context of instructional programming videos. It also encourages designing of system that can produce visual answer to programming based natural language questions. We introduce two datasets: CodeVidQA, with 2,104 question-answer pair links with timestamps taken from programming videos of Stack Overflow for Programming Visual Answer Localization task, and CodeVidCL with 4,331 videos (1,751 programming ,2580 non-programming) for Programming Video Classification task. In addition, we proposed a framework that adapts BigBird and SVM for video classification techniques. The proposed approach achieves a significantly high accuracy of 99.61{\%} for video classification."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="raja-etal-2025-dataset">
<titleInfo>
<title>A Dataset for Programming-based Instructional Video Classification and Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="given">Javaid</namePart>
<namePart type="family">Raja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adeel</namePart>
<namePart type="family">Zafar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aqsa</namePart>
<namePart type="family">Shoaib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop of Evaluation of Multi-Modal Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="given">Emma</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Desmond</namePart>
<namePart type="family">Elliot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byron</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mongyuan</namePart>
<namePart type="family">Sim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haojie</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weitong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work aims to develop an understanding of the rapidly emerging field of VideoQA, particularly in the context of instructional programming videos. It also encourages designing of system that can produce visual answer to programming based natural language questions. We introduce two datasets: CodeVidQA, with 2,104 question-answer pair links with timestamps taken from programming videos of Stack Overflow for Programming Visual Answer Localization task, and CodeVidCL with 4,331 videos (1,751 programming ,2580 non-programming) for Programming Video Classification task. In addition, we proposed a framework that adapts BigBird and SVM for video classification techniques. The proposed approach achieves a significantly high accuracy of 99.61% for video classification.</abstract>
<identifier type="citekey">raja-etal-2025-dataset</identifier>
<location>
<url>https://aclanthology.org/2025.evalmg-1.1/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset for Programming-based Instructional Video Classification and Question Answering
%A Raja, Sana Javaid
%A Zafar, Adeel
%A Shoaib, Aqsa
%Y Zhang, Wei Emma
%Y Dai, Xiang
%Y Elliot, Desmond
%Y Fang, Byron
%Y Sim, Mongyuan
%Y Zhuang, Haojie
%Y Chen, Weitong
%S Proceedings of the First Workshop of Evaluation of Multi-Modal Generation
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F raja-etal-2025-dataset
%X This work aims to develop an understanding of the rapidly emerging field of VideoQA, particularly in the context of instructional programming videos. It also encourages designing of system that can produce visual answer to programming based natural language questions. We introduce two datasets: CodeVidQA, with 2,104 question-answer pair links with timestamps taken from programming videos of Stack Overflow for Programming Visual Answer Localization task, and CodeVidCL with 4,331 videos (1,751 programming ,2580 non-programming) for Programming Video Classification task. In addition, we proposed a framework that adapts BigBird and SVM for video classification techniques. The proposed approach achieves a significantly high accuracy of 99.61% for video classification.
%U https://aclanthology.org/2025.evalmg-1.1/
%P 1-9
Markdown (Informal)
[A Dataset for Programming-based Instructional Video Classification and Question Answering](https://aclanthology.org/2025.evalmg-1.1/) (Raja et al., EvalMG 2025)
ACL