@inproceedings{wilson-frank-2023-inductive,
title = "Inductive Bias Is in the Eye of the Beholder",
author = "Wilson, Michael and
Frank, Robert",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Sinha, Koustuv and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Cotterell, Ryan and
Bruni, Elia",
booktitle = "Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.genbench-1.12",
doi = "10.18653/v1/2023.genbench-1.12",
pages = "152--162",
abstract = "Due to the finite nature of any evidence used in learning, systematic generalization is crucially reliant on the presence of inductive bias (Mitchell, 1980). We examine inductive biases in different types of sequence-to-sequence neural network models, including CNNs, LSTMs (with and without attention), and transformers, inspired by Kharitonov and Chaabouni (2021). Crucially, however, we consider a wider range of possible inductive biases than their study did. Investigating preferences for hierarchical generalization compared to other types of generalization, we find that, contrary to their results, transformers display no preference for hierarchical generalization, but instead prefer a counting strategy. We also investigate biases toward different types of compositionality. By controlling for a confound in Kharitonov and Chaabouni (2021){'}s test set, we find much less consistent generalization overall, and find that a large number of responses were among types other than the two types of generalization they had considered. Nevertheless, we observe consistent compositional generalization to held out combinations of primitives and functions on a SCAN task (Lake and Baroni, 2017) by models of all types, but only when primitives occur with other functions in the training set. The pattern of success indicates generalization in models of these types is highly sensitive to distributional properties of their training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wilson-frank-2023-inductive">
<titleInfo>
<title>Inductive Bias Is in the Eye of the Beholder</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Frank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khuyagbaatar</namePart>
<namePart type="family">Batsuren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustuv</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Kazemnejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elia</namePart>
<namePart type="family">Bruni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Due to the finite nature of any evidence used in learning, systematic generalization is crucially reliant on the presence of inductive bias (Mitchell, 1980). We examine inductive biases in different types of sequence-to-sequence neural network models, including CNNs, LSTMs (with and without attention), and transformers, inspired by Kharitonov and Chaabouni (2021). Crucially, however, we consider a wider range of possible inductive biases than their study did. Investigating preferences for hierarchical generalization compared to other types of generalization, we find that, contrary to their results, transformers display no preference for hierarchical generalization, but instead prefer a counting strategy. We also investigate biases toward different types of compositionality. By controlling for a confound in Kharitonov and Chaabouni (2021)’s test set, we find much less consistent generalization overall, and find that a large number of responses were among types other than the two types of generalization they had considered. Nevertheless, we observe consistent compositional generalization to held out combinations of primitives and functions on a SCAN task (Lake and Baroni, 2017) by models of all types, but only when primitives occur with other functions in the training set. The pattern of success indicates generalization in models of these types is highly sensitive to distributional properties of their training data.</abstract>
<identifier type="citekey">wilson-frank-2023-inductive</identifier>
<identifier type="doi">10.18653/v1/2023.genbench-1.12</identifier>
<location>
<url>https://aclanthology.org/2023.genbench-1.12</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>152</start>
<end>162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inductive Bias Is in the Eye of the Beholder
%A Wilson, Michael
%A Frank, Robert
%Y Hupkes, Dieuwke
%Y Dankers, Verna
%Y Batsuren, Khuyagbaatar
%Y Sinha, Koustuv
%Y Kazemnejad, Amirhossein
%Y Christodoulopoulos, Christos
%Y Cotterell, Ryan
%Y Bruni, Elia
%S Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F wilson-frank-2023-inductive
%X Due to the finite nature of any evidence used in learning, systematic generalization is crucially reliant on the presence of inductive bias (Mitchell, 1980). We examine inductive biases in different types of sequence-to-sequence neural network models, including CNNs, LSTMs (with and without attention), and transformers, inspired by Kharitonov and Chaabouni (2021). Crucially, however, we consider a wider range of possible inductive biases than their study did. Investigating preferences for hierarchical generalization compared to other types of generalization, we find that, contrary to their results, transformers display no preference for hierarchical generalization, but instead prefer a counting strategy. We also investigate biases toward different types of compositionality. By controlling for a confound in Kharitonov and Chaabouni (2021)’s test set, we find much less consistent generalization overall, and find that a large number of responses were among types other than the two types of generalization they had considered. Nevertheless, we observe consistent compositional generalization to held out combinations of primitives and functions on a SCAN task (Lake and Baroni, 2017) by models of all types, but only when primitives occur with other functions in the training set. The pattern of success indicates generalization in models of these types is highly sensitive to distributional properties of their training data.
%R 10.18653/v1/2023.genbench-1.12
%U https://aclanthology.org/2023.genbench-1.12
%U https://doi.org/10.18653/v1/2023.genbench-1.12
%P 152-162
Markdown (Informal)
[Inductive Bias Is in the Eye of the Beholder](https://aclanthology.org/2023.genbench-1.12) (Wilson & Frank, GenBench-WS 2023)
ACL
- Michael Wilson and Robert Frank. 2023. Inductive Bias Is in the Eye of the Beholder. In Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP, pages 152–162, Singapore. Association for Computational Linguistics.