@inproceedings{dutt-etal-2024-investigating,
title = "Investigating the Generalizability of Pretrained Language Models across Multiple Dimensions: A Case Study of {NLI} and {MRC}",
author = "Dutt, Ritam and
Choudhury, Sagnik Ray and
Rao, Varun Venkat and
Rose, Carolyn and
Vydiswaran, V.G.Vinod",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Giulianelli, Mario and
Cotterell, Ryan",
booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.genbench-1.11",
pages = "165--182",
abstract = "Generalization refers to the ability of machine learning models to perform well on dataset distributions different from the one it was trained on. While several pre-existing works have characterized the generalizability of NLP models across different dimensions, such as domain shift, adversarial perturbations, or compositional variations, most studies were carried out in a stand-alone setting, emphasizing a single dimension of interest. We bridge this gap by systematically investigating the generalizability of pre-trained language models across different architectures, sizes, and training strategies, over multiple dimensions for the task of natural language inference and question answering. Our results indicate that model instances typically exhibit consistent generalization trends, i.e., they generalize equally well (or poorly) across most scenarios, and this ability is correlated with model architecture, base dataset performance, size, and training mechanism. We hope this research motivates further work in a) developing a multi-dimensional generalization benchmark for systematic evaluation and b) examining the reasons behind models{'} generalization abilities. The code and data are available at https://github.com/sagnik/md-gen-nlp, and the trained models are released at https://huggingface.co/varun-v-rao.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dutt-etal-2024-investigating">
<titleInfo>
<title>Investigating the Generalizability of Pretrained Language Models across Multiple Dimensions: A Case Study of NLI and MRC</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritam</namePart>
<namePart type="family">Dutt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sagnik</namePart>
<namePart type="given">Ray</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="given">Venkat</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">V.G.Vinod</namePart>
<namePart type="family">Vydiswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khuyagbaatar</namePart>
<namePart type="family">Batsuren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Kazemnejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Giulianelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generalization refers to the ability of machine learning models to perform well on dataset distributions different from the one it was trained on. While several pre-existing works have characterized the generalizability of NLP models across different dimensions, such as domain shift, adversarial perturbations, or compositional variations, most studies were carried out in a stand-alone setting, emphasizing a single dimension of interest. We bridge this gap by systematically investigating the generalizability of pre-trained language models across different architectures, sizes, and training strategies, over multiple dimensions for the task of natural language inference and question answering. Our results indicate that model instances typically exhibit consistent generalization trends, i.e., they generalize equally well (or poorly) across most scenarios, and this ability is correlated with model architecture, base dataset performance, size, and training mechanism. We hope this research motivates further work in a) developing a multi-dimensional generalization benchmark for systematic evaluation and b) examining the reasons behind models’ generalization abilities. The code and data are available at https://github.com/sagnik/md-gen-nlp, and the trained models are released at https://huggingface.co/varun-v-rao.</abstract>
<identifier type="citekey">dutt-etal-2024-investigating</identifier>
<location>
<url>https://aclanthology.org/2024.genbench-1.11</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>165</start>
<end>182</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating the Generalizability of Pretrained Language Models across Multiple Dimensions: A Case Study of NLI and MRC
%A Dutt, Ritam
%A Choudhury, Sagnik Ray
%A Rao, Varun Venkat
%A Rose, Carolyn
%A Vydiswaran, V.G.Vinod
%Y Hupkes, Dieuwke
%Y Dankers, Verna
%Y Batsuren, Khuyagbaatar
%Y Kazemnejad, Amirhossein
%Y Christodoulopoulos, Christos
%Y Giulianelli, Mario
%Y Cotterell, Ryan
%S Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F dutt-etal-2024-investigating
%X Generalization refers to the ability of machine learning models to perform well on dataset distributions different from the one it was trained on. While several pre-existing works have characterized the generalizability of NLP models across different dimensions, such as domain shift, adversarial perturbations, or compositional variations, most studies were carried out in a stand-alone setting, emphasizing a single dimension of interest. We bridge this gap by systematically investigating the generalizability of pre-trained language models across different architectures, sizes, and training strategies, over multiple dimensions for the task of natural language inference and question answering. Our results indicate that model instances typically exhibit consistent generalization trends, i.e., they generalize equally well (or poorly) across most scenarios, and this ability is correlated with model architecture, base dataset performance, size, and training mechanism. We hope this research motivates further work in a) developing a multi-dimensional generalization benchmark for systematic evaluation and b) examining the reasons behind models’ generalization abilities. The code and data are available at https://github.com/sagnik/md-gen-nlp, and the trained models are released at https://huggingface.co/varun-v-rao.
%U https://aclanthology.org/2024.genbench-1.11
%P 165-182
Markdown (Informal)
[Investigating the Generalizability of Pretrained Language Models across Multiple Dimensions: A Case Study of NLI and MRC](https://aclanthology.org/2024.genbench-1.11) (Dutt et al., GenBench 2024)
ACL