@inproceedings{shen-etal-2024-bambino,
title = "{BAMBINO}-{LM}: (Bilingual-)Human-Inspired Continual Pre-training of {B}aby{LM}",
author = "Shen, Zhewen and
Joshi, Aditya and
Chen, Ruey-Cheng",
editor = "Kuribayashi, Tatsuki and
Rambelli, Giulia and
Takmaz, Ece and
Wicke, Philipp and
Oseki, Yohei",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.cmcl-1.1",
doi = "10.18653/v1/2024.cmcl-1.1",
pages = "1--7",
abstract = "Children from bilingual backgrounds benefit from interactions with parents and teachers to re-acquire their heritage language. In this paper, we investigate how this insight from behavioral study can be incorporated into the learning of small-scale language models. We introduce BAMBINO-LM, a continual pre-training strategy for BabyLM that uses a novel combination of alternation and PPO-based perplexity reward induced from a parent Italian model. Upon evaluation on zero-shot classification tasks for English and Italian, BAMBINO-LM improves the Italian language capability of a BabyLM baseline. Our ablation analysis demonstrates that employing both the alternation strategy and PPO-based modeling is key to this effectiveness gain. We also show that, as a side effect, the proposed method leads to a similar degradation in L1 effectiveness as human children would have had in an equivalent learning scenario. Through its modeling and findings, BAMBINO-LM makes a focused contribution to the pre-training of small-scale language models by first developing a human-inspired strategy for pre-training and then showing that it results in behaviours similar to that of humans.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shen-etal-2024-bambino">
<titleInfo>
<title>BAMBINO-LM: (Bilingual-)Human-Inspired Continual Pre-training of BabyLM</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhewen</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Joshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruey-Cheng</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuki</namePart>
<namePart type="family">Kuribayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giulia</namePart>
<namePart type="family">Rambelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ece</namePart>
<namePart type="family">Takmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Wicke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Oseki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Children from bilingual backgrounds benefit from interactions with parents and teachers to re-acquire their heritage language. In this paper, we investigate how this insight from behavioral study can be incorporated into the learning of small-scale language models. We introduce BAMBINO-LM, a continual pre-training strategy for BabyLM that uses a novel combination of alternation and PPO-based perplexity reward induced from a parent Italian model. Upon evaluation on zero-shot classification tasks for English and Italian, BAMBINO-LM improves the Italian language capability of a BabyLM baseline. Our ablation analysis demonstrates that employing both the alternation strategy and PPO-based modeling is key to this effectiveness gain. We also show that, as a side effect, the proposed method leads to a similar degradation in L1 effectiveness as human children would have had in an equivalent learning scenario. Through its modeling and findings, BAMBINO-LM makes a focused contribution to the pre-training of small-scale language models by first developing a human-inspired strategy for pre-training and then showing that it results in behaviours similar to that of humans.</abstract>
<identifier type="citekey">shen-etal-2024-bambino</identifier>
<identifier type="doi">10.18653/v1/2024.cmcl-1.1</identifier>
<location>
<url>https://aclanthology.org/2024.cmcl-1.1</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1</start>
<end>7</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BAMBINO-LM: (Bilingual-)Human-Inspired Continual Pre-training of BabyLM
%A Shen, Zhewen
%A Joshi, Aditya
%A Chen, Ruey-Cheng
%Y Kuribayashi, Tatsuki
%Y Rambelli, Giulia
%Y Takmaz, Ece
%Y Wicke, Philipp
%Y Oseki, Yohei
%S Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F shen-etal-2024-bambino
%X Children from bilingual backgrounds benefit from interactions with parents and teachers to re-acquire their heritage language. In this paper, we investigate how this insight from behavioral study can be incorporated into the learning of small-scale language models. We introduce BAMBINO-LM, a continual pre-training strategy for BabyLM that uses a novel combination of alternation and PPO-based perplexity reward induced from a parent Italian model. Upon evaluation on zero-shot classification tasks for English and Italian, BAMBINO-LM improves the Italian language capability of a BabyLM baseline. Our ablation analysis demonstrates that employing both the alternation strategy and PPO-based modeling is key to this effectiveness gain. We also show that, as a side effect, the proposed method leads to a similar degradation in L1 effectiveness as human children would have had in an equivalent learning scenario. Through its modeling and findings, BAMBINO-LM makes a focused contribution to the pre-training of small-scale language models by first developing a human-inspired strategy for pre-training and then showing that it results in behaviours similar to that of humans.
%R 10.18653/v1/2024.cmcl-1.1
%U https://aclanthology.org/2024.cmcl-1.1
%U https://doi.org/10.18653/v1/2024.cmcl-1.1
%P 1-7
Markdown (Informal)
[BAMBINO-LM: (Bilingual-)Human-Inspired Continual Pre-training of BabyLM](https://aclanthology.org/2024.cmcl-1.1) (Shen et al., CMCL-WS 2024)
ACL