@inproceedings{fernandez-etal-2024-syllabusqa,
title = "{S}yllabus{QA}: A Course Logistics Question Answering Dataset",
author = "Fernandez, Nigel and
Scarlatos, Alexander and
Lan, Andrew",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-long.557/",
doi = "10.18653/v1/2024.acl-long.557",
pages = "10344--10369",
abstract = "Automated teaching assistants and chatbots have significant potential to reduce the workload of human instructors, especially for logistics-related question answering, which is important to students yet repetitive for instructors. However, due to privacy concerns, there is a lack of publicly available datasets. We introduce SyllabusQA, an open-source dataset with 63 real course syllabi covering 36 majors, containing 5,078 open-ended course logistics-related question-answer pairs that are diverse in both question types and answer formats. Since many logistics-related questions contain critical information like the date of an exam, it is important to evaluate the factuality of answers. We benchmark several strong baselines on this task, from large language model prompting to retrieval-augmented generation. We introduce Fact-QA, an LLM-based (GPT-4) evaluation metric to evaluate the factuality of predicted answers. We find that despite performing close to humans on traditional metrics of textual similarity, there remains a significant gap between automated approaches and humans in terms of fact precision."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fernandez-etal-2024-syllabusqa">
<titleInfo>
<title>SyllabusQA: A Course Logistics Question Answering Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nigel</namePart>
<namePart type="family">Fernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Scarlatos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Lan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automated teaching assistants and chatbots have significant potential to reduce the workload of human instructors, especially for logistics-related question answering, which is important to students yet repetitive for instructors. However, due to privacy concerns, there is a lack of publicly available datasets. We introduce SyllabusQA, an open-source dataset with 63 real course syllabi covering 36 majors, containing 5,078 open-ended course logistics-related question-answer pairs that are diverse in both question types and answer formats. Since many logistics-related questions contain critical information like the date of an exam, it is important to evaluate the factuality of answers. We benchmark several strong baselines on this task, from large language model prompting to retrieval-augmented generation. We introduce Fact-QA, an LLM-based (GPT-4) evaluation metric to evaluate the factuality of predicted answers. We find that despite performing close to humans on traditional metrics of textual similarity, there remains a significant gap between automated approaches and humans in terms of fact precision.</abstract>
<identifier type="citekey">fernandez-etal-2024-syllabusqa</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.557</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-long.557/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>10344</start>
<end>10369</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SyllabusQA: A Course Logistics Question Answering Dataset
%A Fernandez, Nigel
%A Scarlatos, Alexander
%A Lan, Andrew
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F fernandez-etal-2024-syllabusqa
%X Automated teaching assistants and chatbots have significant potential to reduce the workload of human instructors, especially for logistics-related question answering, which is important to students yet repetitive for instructors. However, due to privacy concerns, there is a lack of publicly available datasets. We introduce SyllabusQA, an open-source dataset with 63 real course syllabi covering 36 majors, containing 5,078 open-ended course logistics-related question-answer pairs that are diverse in both question types and answer formats. Since many logistics-related questions contain critical information like the date of an exam, it is important to evaluate the factuality of answers. We benchmark several strong baselines on this task, from large language model prompting to retrieval-augmented generation. We introduce Fact-QA, an LLM-based (GPT-4) evaluation metric to evaluate the factuality of predicted answers. We find that despite performing close to humans on traditional metrics of textual similarity, there remains a significant gap between automated approaches and humans in terms of fact precision.
%R 10.18653/v1/2024.acl-long.557
%U https://aclanthology.org/2024.luhme-long.557/
%U https://doi.org/10.18653/v1/2024.acl-long.557
%P 10344-10369
Markdown (Informal)
[SyllabusQA: A Course Logistics Question Answering Dataset](https://aclanthology.org/2024.luhme-long.557/) (Fernandez et al., ACL 2024)
ACL
- Nigel Fernandez, Alexander Scarlatos, and Andrew Lan. 2024. SyllabusQA: A Course Logistics Question Answering Dataset. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 10344–10369, Bangkok, Thailand. Association for Computational Linguistics.