@inproceedings{porada-etal-2022-pre,
title = "Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge",
author = "Porada, Ian and
Sordoni, Alessandro and
Cheung, Jackie",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.337",
doi = "10.18653/v1/2022.naacl-main.337",
pages = "4550--4557",
abstract = "Transformer models pre-trained with a masked-language-modeling objective (e.g., BERT) encode commonsense knowledge as evidenced by behavioral probes; however, the extent to which this knowledge is acquired by systematic inference over the semantics of the pre-training corpora is an open question. To answer this question, we selectively inject verbalized knowledge into the pre-training minibatches of BERT and evaluate how well the model generalizes to supported inferences after pre-training on the injected knowledge. We find generalization does not improve over the course of pre-training BERT from scratch, suggesting that commonsense knowledge is acquired from surface-level, co-occurrence patterns rather than induced, systematic reasoning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="porada-etal-2022-pre">
<titleInfo>
<title>Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ian</namePart>
<namePart type="family">Porada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Sordoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackie</namePart>
<namePart type="family">Cheung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transformer models pre-trained with a masked-language-modeling objective (e.g., BERT) encode commonsense knowledge as evidenced by behavioral probes; however, the extent to which this knowledge is acquired by systematic inference over the semantics of the pre-training corpora is an open question. To answer this question, we selectively inject verbalized knowledge into the pre-training minibatches of BERT and evaluate how well the model generalizes to supported inferences after pre-training on the injected knowledge. We find generalization does not improve over the course of pre-training BERT from scratch, suggesting that commonsense knowledge is acquired from surface-level, co-occurrence patterns rather than induced, systematic reasoning.</abstract>
<identifier type="citekey">porada-etal-2022-pre</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.337</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.337</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>4550</start>
<end>4557</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge
%A Porada, Ian
%A Sordoni, Alessandro
%A Cheung, Jackie
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F porada-etal-2022-pre
%X Transformer models pre-trained with a masked-language-modeling objective (e.g., BERT) encode commonsense knowledge as evidenced by behavioral probes; however, the extent to which this knowledge is acquired by systematic inference over the semantics of the pre-training corpora is an open question. To answer this question, we selectively inject verbalized knowledge into the pre-training minibatches of BERT and evaluate how well the model generalizes to supported inferences after pre-training on the injected knowledge. We find generalization does not improve over the course of pre-training BERT from scratch, suggesting that commonsense knowledge is acquired from surface-level, co-occurrence patterns rather than induced, systematic reasoning.
%R 10.18653/v1/2022.naacl-main.337
%U https://aclanthology.org/2022.naacl-main.337
%U https://doi.org/10.18653/v1/2022.naacl-main.337
%P 4550-4557
Markdown (Informal)
[Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge](https://aclanthology.org/2022.naacl-main.337) (Porada et al., NAACL 2022)
ACL