@inproceedings{wennberg-henter-2024-exploring,
title = "Exploring Internal Numeracy in Language Models: A Case Study on {ALBERT}",
author = "Wennberg, Ulme and
Henter, Gustav Eje",
editor = "Valentino, Marco and
Ferreira, Deborah and
Thayaparan, Mokanarangan and
Freitas, Andre",
booktitle = "Proceedings of the 2nd Workshop on Mathematical Natural Language Processing @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.mathnlp-1.5",
pages = "35--40",
abstract = "It has been found that Transformer-based language models have the ability to perform basic quantitative reasoning. In this paper, we propose a method for studying how these models internally represent numerical data, and use our proposal to analyze the ALBERT family of language models. Specifically, we extract the learned embeddings these models use to represent tokens that correspond to numbers and ordinals, and subject these embeddings to Principal Component Analysis (PCA). PCA results reveal that ALBERT models of different sizes, trained and initialized separately, consistently learn to use the axes of greatest variation to represent the approximate ordering of various numerical concepts. Numerals and their textual counterparts are represented in separate clusters, but increase along the same direction in 2D space. Our findings illustrate that language models, trained purely to model text, can intuit basic mathematical concepts, opening avenues for NLP applications that intersect with quantitative reasoning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wennberg-henter-2024-exploring">
<titleInfo>
<title>Exploring Internal Numeracy in Language Models: A Case Study on ALBERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ulme</namePart>
<namePart type="family">Wennberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gustav</namePart>
<namePart type="given">Eje</namePart>
<namePart type="family">Henter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Mathematical Natural Language Processing @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valentino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deborah</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokanarangan</namePart>
<namePart type="family">Thayaparan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It has been found that Transformer-based language models have the ability to perform basic quantitative reasoning. In this paper, we propose a method for studying how these models internally represent numerical data, and use our proposal to analyze the ALBERT family of language models. Specifically, we extract the learned embeddings these models use to represent tokens that correspond to numbers and ordinals, and subject these embeddings to Principal Component Analysis (PCA). PCA results reveal that ALBERT models of different sizes, trained and initialized separately, consistently learn to use the axes of greatest variation to represent the approximate ordering of various numerical concepts. Numerals and their textual counterparts are represented in separate clusters, but increase along the same direction in 2D space. Our findings illustrate that language models, trained purely to model text, can intuit basic mathematical concepts, opening avenues for NLP applications that intersect with quantitative reasoning.</abstract>
<identifier type="citekey">wennberg-henter-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.mathnlp-1.5</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>35</start>
<end>40</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Internal Numeracy in Language Models: A Case Study on ALBERT
%A Wennberg, Ulme
%A Henter, Gustav Eje
%Y Valentino, Marco
%Y Ferreira, Deborah
%Y Thayaparan, Mokanarangan
%Y Freitas, Andre
%S Proceedings of the 2nd Workshop on Mathematical Natural Language Processing @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F wennberg-henter-2024-exploring
%X It has been found that Transformer-based language models have the ability to perform basic quantitative reasoning. In this paper, we propose a method for studying how these models internally represent numerical data, and use our proposal to analyze the ALBERT family of language models. Specifically, we extract the learned embeddings these models use to represent tokens that correspond to numbers and ordinals, and subject these embeddings to Principal Component Analysis (PCA). PCA results reveal that ALBERT models of different sizes, trained and initialized separately, consistently learn to use the axes of greatest variation to represent the approximate ordering of various numerical concepts. Numerals and their textual counterparts are represented in separate clusters, but increase along the same direction in 2D space. Our findings illustrate that language models, trained purely to model text, can intuit basic mathematical concepts, opening avenues for NLP applications that intersect with quantitative reasoning.
%U https://aclanthology.org/2024.mathnlp-1.5
%P 35-40
Markdown (Informal)
[Exploring Internal Numeracy in Language Models: A Case Study on ALBERT](https://aclanthology.org/2024.mathnlp-1.5) (Wennberg & Henter, MathNLP-WS 2024)
ACL