@inproceedings{voloshina-serikov-2024-critical,
title = "Critical Size Hypothesis: How Model Hyperparameters Correlate with Its Linguistic Abilities",
author = "Voloshina, Ekaterina and
Serikov, Oleg",
editor = "Qiu, Amy and
Noble, Bill and
Pagmar, David and
Maraev, Vladislav and
Ilinykh, Nikolai",
booktitle = "Proceedings of the 2024 CLASP Conference on Multimodality and Interaction in Language Learning",
month = oct,
year = "2024",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clasp-1.1",
pages = "1--7",
abstract = "In recent years, the models were tested on different probing tasks to examine their language knowledge. However, few researchers explored the very process of models{'} language acquisition. Nevertheless, the analysis of language acquisition during training could shed light on the model parameters that help to acquire the language faster. In this work, we experiment with model hyperparameters and reveal that the hidden size is the most essential factor for model language acquisition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="voloshina-serikov-2024-critical">
<titleInfo>
<title>Critical Size Hypothesis: How Model Hyperparameters Correlate with Its Linguistic Abilities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 CLASP Conference on Multimodality and Interaction in Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amy</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bill</namePart>
<namePart type="family">Noble</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Pagmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladislav</namePart>
<namePart type="family">Maraev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, the models were tested on different probing tasks to examine their language knowledge. However, few researchers explored the very process of models’ language acquisition. Nevertheless, the analysis of language acquisition during training could shed light on the model parameters that help to acquire the language faster. In this work, we experiment with model hyperparameters and reveal that the hidden size is the most essential factor for model language acquisition.</abstract>
<identifier type="citekey">voloshina-serikov-2024-critical</identifier>
<location>
<url>https://aclanthology.org/2024.clasp-1.1</url>
</location>
<part>
<date>2024-10</date>
<extent unit="page">
<start>1</start>
<end>7</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Critical Size Hypothesis: How Model Hyperparameters Correlate with Its Linguistic Abilities
%A Voloshina, Ekaterina
%A Serikov, Oleg
%Y Qiu, Amy
%Y Noble, Bill
%Y Pagmar, David
%Y Maraev, Vladislav
%Y Ilinykh, Nikolai
%S Proceedings of the 2024 CLASP Conference on Multimodality and Interaction in Language Learning
%D 2024
%8 October
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F voloshina-serikov-2024-critical
%X In recent years, the models were tested on different probing tasks to examine their language knowledge. However, few researchers explored the very process of models’ language acquisition. Nevertheless, the analysis of language acquisition during training could shed light on the model parameters that help to acquire the language faster. In this work, we experiment with model hyperparameters and reveal that the hidden size is the most essential factor for model language acquisition.
%U https://aclanthology.org/2024.clasp-1.1
%P 1-7
Markdown (Informal)
[Critical Size Hypothesis: How Model Hyperparameters Correlate with Its Linguistic Abilities](https://aclanthology.org/2024.clasp-1.1) (Voloshina & Serikov, CLASP 2024)
ACL