@inproceedings{bunzeck-zarriess-2024-fifty,
title = "Fifty shapes of {BL}i{MP}: syntactic learning curves in language models are not uniform, but sometimes unruly",
author = "Bunzeck, Bastian and
Zarrie{\ss}, Sina",
editor = "Qiu, Amy and
Noble, Bill and
Pagmar, David and
Maraev, Vladislav and
Ilinykh, Nikolai",
booktitle = "Proceedings of the 2024 CLASP Conference on Multimodality and Interaction in Language Learning",
month = oct,
year = "2024",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clasp-1.7",
pages = "39--55",
abstract = "Syntactic learning curves in LMs are usually reported as relatively stable and power law-shaped. By analyzing the learning curves of different LMs on various syntactic phenomena using both small self-trained llama models and larger pre-trained pythia models, we show that while many phenomena do follow typical power law curves, others exhibit S-shaped, U-shaped, or erratic patterns. Certain syntactic paradigms remain challenging even for large models, resulting in persistent preference for ungrammatical sentences. Most phenomena show similar curves for their paradigms, but the existence of diverging patterns and oscillations indicates that average curves mask important developments, underscoring the need for more detailed analyses of individual learning trajectories.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bunzeck-zarriess-2024-fifty">
<titleInfo>
<title>Fifty shapes of BLiMP: syntactic learning curves in language models are not uniform, but sometimes unruly</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bastian</namePart>
<namePart type="family">Bunzeck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 CLASP Conference on Multimodality and Interaction in Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amy</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bill</namePart>
<namePart type="family">Noble</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Pagmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladislav</namePart>
<namePart type="family">Maraev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Syntactic learning curves in LMs are usually reported as relatively stable and power law-shaped. By analyzing the learning curves of different LMs on various syntactic phenomena using both small self-trained llama models and larger pre-trained pythia models, we show that while many phenomena do follow typical power law curves, others exhibit S-shaped, U-shaped, or erratic patterns. Certain syntactic paradigms remain challenging even for large models, resulting in persistent preference for ungrammatical sentences. Most phenomena show similar curves for their paradigms, but the existence of diverging patterns and oscillations indicates that average curves mask important developments, underscoring the need for more detailed analyses of individual learning trajectories.</abstract>
<identifier type="citekey">bunzeck-zarriess-2024-fifty</identifier>
<location>
<url>https://aclanthology.org/2024.clasp-1.7</url>
</location>
<part>
<date>2024-10</date>
<extent unit="page">
<start>39</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fifty shapes of BLiMP: syntactic learning curves in language models are not uniform, but sometimes unruly
%A Bunzeck, Bastian
%A Zarrieß, Sina
%Y Qiu, Amy
%Y Noble, Bill
%Y Pagmar, David
%Y Maraev, Vladislav
%Y Ilinykh, Nikolai
%S Proceedings of the 2024 CLASP Conference on Multimodality and Interaction in Language Learning
%D 2024
%8 October
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F bunzeck-zarriess-2024-fifty
%X Syntactic learning curves in LMs are usually reported as relatively stable and power law-shaped. By analyzing the learning curves of different LMs on various syntactic phenomena using both small self-trained llama models and larger pre-trained pythia models, we show that while many phenomena do follow typical power law curves, others exhibit S-shaped, U-shaped, or erratic patterns. Certain syntactic paradigms remain challenging even for large models, resulting in persistent preference for ungrammatical sentences. Most phenomena show similar curves for their paradigms, but the existence of diverging patterns and oscillations indicates that average curves mask important developments, underscoring the need for more detailed analyses of individual learning trajectories.
%U https://aclanthology.org/2024.clasp-1.7
%P 39-55
Markdown (Informal)
[Fifty shapes of BLiMP: syntactic learning curves in language models are not uniform, but sometimes unruly](https://aclanthology.org/2024.clasp-1.7) (Bunzeck & Zarrieß, CLASP 2024)
ACL