@inproceedings{maharana-bansal-2022-curriculum,
title = "On Curriculum Learning for Commonsense Reasoning",
author = "Maharana, Adyasha and
Bansal, Mohit",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.72",
doi = "10.18653/v1/2022.naacl-main.72",
pages = "983--992",
abstract = "Commonsense reasoning tasks follow a standard paradigm of finetuning pretrained language models on the target task data, where samples are introduced to the model in a random order during training. However, recent research suggests that data order can have a significant impact on the performance of finetuned models for natural language understanding. Hence, we examine the effect of a human-like easy-to-difficult curriculum during finetuning of language models for commonsense reasoning tasks. We use paced curriculum learning to rank data and sample training mini-batches with increasing levels of difficulty from the ranked dataset during finetuning. Further, we investigate the effect of an adaptive curriculum, i.e., the data ranking is dynamically updated during training based on the current state of the learner model. We use a teacher model to measure difficulty of each sample and experiment with three measures based on question answering probability, variability and out-of-distribution. To understand the effectiveness of curriculum learning in various scenarios, we apply it on full model fine-tuning as well as parameter-efficient prompt-tuning settings. Our results show that fixed as well as adaptive curriculum learning significantly improve performance for five commonsense reasoning tasks, i.e., SocialIQA, CosmosQA, CODAH, HellaSwag, WinoGrande in both tuning settings. Further, we find that prioritizing the difficult samples in the tail end of training improves generalization to unseen in-domain data as well as out-of-domain data. Our work provides evidence and encourages research into curriculum learning for commonsense reasoning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maharana-bansal-2022-curriculum">
<titleInfo>
<title>On Curriculum Learning for Commonsense Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adyasha</namePart>
<namePart type="family">Maharana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Commonsense reasoning tasks follow a standard paradigm of finetuning pretrained language models on the target task data, where samples are introduced to the model in a random order during training. However, recent research suggests that data order can have a significant impact on the performance of finetuned models for natural language understanding. Hence, we examine the effect of a human-like easy-to-difficult curriculum during finetuning of language models for commonsense reasoning tasks. We use paced curriculum learning to rank data and sample training mini-batches with increasing levels of difficulty from the ranked dataset during finetuning. Further, we investigate the effect of an adaptive curriculum, i.e., the data ranking is dynamically updated during training based on the current state of the learner model. We use a teacher model to measure difficulty of each sample and experiment with three measures based on question answering probability, variability and out-of-distribution. To understand the effectiveness of curriculum learning in various scenarios, we apply it on full model fine-tuning as well as parameter-efficient prompt-tuning settings. Our results show that fixed as well as adaptive curriculum learning significantly improve performance for five commonsense reasoning tasks, i.e., SocialIQA, CosmosQA, CODAH, HellaSwag, WinoGrande in both tuning settings. Further, we find that prioritizing the difficult samples in the tail end of training improves generalization to unseen in-domain data as well as out-of-domain data. Our work provides evidence and encourages research into curriculum learning for commonsense reasoning.</abstract>
<identifier type="citekey">maharana-bansal-2022-curriculum</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.72</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.72</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>983</start>
<end>992</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Curriculum Learning for Commonsense Reasoning
%A Maharana, Adyasha
%A Bansal, Mohit
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F maharana-bansal-2022-curriculum
%X Commonsense reasoning tasks follow a standard paradigm of finetuning pretrained language models on the target task data, where samples are introduced to the model in a random order during training. However, recent research suggests that data order can have a significant impact on the performance of finetuned models for natural language understanding. Hence, we examine the effect of a human-like easy-to-difficult curriculum during finetuning of language models for commonsense reasoning tasks. We use paced curriculum learning to rank data and sample training mini-batches with increasing levels of difficulty from the ranked dataset during finetuning. Further, we investigate the effect of an adaptive curriculum, i.e., the data ranking is dynamically updated during training based on the current state of the learner model. We use a teacher model to measure difficulty of each sample and experiment with three measures based on question answering probability, variability and out-of-distribution. To understand the effectiveness of curriculum learning in various scenarios, we apply it on full model fine-tuning as well as parameter-efficient prompt-tuning settings. Our results show that fixed as well as adaptive curriculum learning significantly improve performance for five commonsense reasoning tasks, i.e., SocialIQA, CosmosQA, CODAH, HellaSwag, WinoGrande in both tuning settings. Further, we find that prioritizing the difficult samples in the tail end of training improves generalization to unseen in-domain data as well as out-of-domain data. Our work provides evidence and encourages research into curriculum learning for commonsense reasoning.
%R 10.18653/v1/2022.naacl-main.72
%U https://aclanthology.org/2022.naacl-main.72
%U https://doi.org/10.18653/v1/2022.naacl-main.72
%P 983-992
Markdown (Informal)
[On Curriculum Learning for Commonsense Reasoning](https://aclanthology.org/2022.naacl-main.72) (Maharana & Bansal, NAACL 2022)
ACL
- Adyasha Maharana and Mohit Bansal. 2022. On Curriculum Learning for Commonsense Reasoning. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 983–992, Seattle, United States. Association for Computational Linguistics.