@inproceedings{teehan-etal-2022-emergent,
title = "Emergent Structures and Training Dynamics in Large Language Models",
author = "Teehan, Ryan and
Clinciu, Miruna and
Serikov, Oleg and
Szczechla, Eliza and
Seelam, Natasha and
Mirkin, Shachar and
Gokaslan, Aaron",
editor = "Fan, Angela and
Ilic, Suzana and
Wolf, Thomas and
Gall{\'e}, Matthias",
booktitle = "Proceedings of BigScience Episode {\#}5 -- Workshop on Challenges {\&} Perspectives in Creating Large Language Models",
month = may,
year = "2022",
address = "virtual+Dublin",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.bigscience-1.11/",
doi = "10.18653/v1/2022.bigscience-1.11",
pages = "146--159",
abstract = "Large language models have achieved success on a number of downstream tasks, particularly in a few and zero-shot manner. As a consequence, researchers have been investigating both the kind of information these networks learn and how such information can be encoded in the parameters of the model. We survey the literature on changes in the network during training, drawing from work outside of NLP when necessary, and on learned representations of linguistic features in large language models. We note in particular the lack of sufficient research on the emergence of functional units, subsections of the network where related functions are grouped or organised, within large language models and motivate future work that grounds the study of language models in an analysis of their changing internal structure during training time."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="teehan-etal-2022-emergent">
<titleInfo>
<title>Emergent Structures and Training Dynamics in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Teehan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miruna</namePart>
<namePart type="family">Clinciu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eliza</namePart>
<namePart type="family">Szczechla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natasha</namePart>
<namePart type="family">Seelam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shachar</namePart>
<namePart type="family">Mirkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Gokaslan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzana</namePart>
<namePart type="family">Ilic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Gallé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">virtual+Dublin</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models have achieved success on a number of downstream tasks, particularly in a few and zero-shot manner. As a consequence, researchers have been investigating both the kind of information these networks learn and how such information can be encoded in the parameters of the model. We survey the literature on changes in the network during training, drawing from work outside of NLP when necessary, and on learned representations of linguistic features in large language models. We note in particular the lack of sufficient research on the emergence of functional units, subsections of the network where related functions are grouped or organised, within large language models and motivate future work that grounds the study of language models in an analysis of their changing internal structure during training time.</abstract>
<identifier type="citekey">teehan-etal-2022-emergent</identifier>
<identifier type="doi">10.18653/v1/2022.bigscience-1.11</identifier>
<location>
<url>https://aclanthology.org/2022.bigscience-1.11/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>146</start>
<end>159</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Emergent Structures and Training Dynamics in Large Language Models
%A Teehan, Ryan
%A Clinciu, Miruna
%A Serikov, Oleg
%A Szczechla, Eliza
%A Seelam, Natasha
%A Mirkin, Shachar
%A Gokaslan, Aaron
%Y Fan, Angela
%Y Ilic, Suzana
%Y Wolf, Thomas
%Y Gallé, Matthias
%S Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models
%D 2022
%8 May
%I Association for Computational Linguistics
%C virtual+Dublin
%F teehan-etal-2022-emergent
%X Large language models have achieved success on a number of downstream tasks, particularly in a few and zero-shot manner. As a consequence, researchers have been investigating both the kind of information these networks learn and how such information can be encoded in the parameters of the model. We survey the literature on changes in the network during training, drawing from work outside of NLP when necessary, and on learned representations of linguistic features in large language models. We note in particular the lack of sufficient research on the emergence of functional units, subsections of the network where related functions are grouped or organised, within large language models and motivate future work that grounds the study of language models in an analysis of their changing internal structure during training time.
%R 10.18653/v1/2022.bigscience-1.11
%U https://aclanthology.org/2022.bigscience-1.11/
%U https://doi.org/10.18653/v1/2022.bigscience-1.11
%P 146-159
Markdown (Informal)
[Emergent Structures and Training Dynamics in Large Language Models](https://aclanthology.org/2022.bigscience-1.11/) (Teehan et al., BigScience 2022)
ACL
- Ryan Teehan, Miruna Clinciu, Oleg Serikov, Eliza Szczechla, Natasha Seelam, Shachar Mirkin, and Aaron Gokaslan. 2022. Emergent Structures and Training Dynamics in Large Language Models. In Proceedings of BigScience Episode #5 -- Workshop on Challenges & Perspectives in Creating Large Language Models, pages 146–159, virtual+Dublin. Association for Computational Linguistics.