@inproceedings{heinzerling-inui-2024-monotonic,
title = "Monotonic Representation of Numeric Attributes in Language Models",
author = "Heinzerling, Benjamin and
Inui, Kentaro",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-short.18/",
doi = "10.18653/v1/2024.acl-short.18",
pages = "175--195",
abstract = "Language models (LMs) can express factual knowledge involving numeric properties such as Karl Popper was born in 1902. However, how this information is encoded in the model`s internal representations is not understood well. Here, we introduce a method for finding and editing representations of numeric properties such as an entity`s birth year. We find directions that encode numeric properties monotonically, in an interpretable fashion. When editing representations along these directions, LM output changes accordingly. For example, by patching activations along a {\textquotedblleft}birthyear{\textquotedblright} direction we can make the LM express an increasingly late birthyear. Property-encoding directions exist across several numeric properties in all models under consideration, suggesting the possibility that monotonic representation of numeric properties consistently emerges during LM pretraining.Code: https://github.com/bheinzerling/numeric-property-reprA long version of this short paper is available at: https://arxiv.org/abs/2403.10381"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="heinzerling-inui-2024-monotonic">
<titleInfo>
<title>Monotonic Representation of Numeric Attributes in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Heinzerling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models (LMs) can express factual knowledge involving numeric properties such as Karl Popper was born in 1902. However, how this information is encoded in the model‘s internal representations is not understood well. Here, we introduce a method for finding and editing representations of numeric properties such as an entity‘s birth year. We find directions that encode numeric properties monotonically, in an interpretable fashion. When editing representations along these directions, LM output changes accordingly. For example, by patching activations along a “birthyear” direction we can make the LM express an increasingly late birthyear. Property-encoding directions exist across several numeric properties in all models under consideration, suggesting the possibility that monotonic representation of numeric properties consistently emerges during LM pretraining.Code: https://github.com/bheinzerling/numeric-property-reprA long version of this short paper is available at: https://arxiv.org/abs/2403.10381</abstract>
<identifier type="citekey">heinzerling-inui-2024-monotonic</identifier>
<identifier type="doi">10.18653/v1/2024.acl-short.18</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-short.18/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>175</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Monotonic Representation of Numeric Attributes in Language Models
%A Heinzerling, Benjamin
%A Inui, Kentaro
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F heinzerling-inui-2024-monotonic
%X Language models (LMs) can express factual knowledge involving numeric properties such as Karl Popper was born in 1902. However, how this information is encoded in the model‘s internal representations is not understood well. Here, we introduce a method for finding and editing representations of numeric properties such as an entity‘s birth year. We find directions that encode numeric properties monotonically, in an interpretable fashion. When editing representations along these directions, LM output changes accordingly. For example, by patching activations along a “birthyear” direction we can make the LM express an increasingly late birthyear. Property-encoding directions exist across several numeric properties in all models under consideration, suggesting the possibility that monotonic representation of numeric properties consistently emerges during LM pretraining.Code: https://github.com/bheinzerling/numeric-property-reprA long version of this short paper is available at: https://arxiv.org/abs/2403.10381
%R 10.18653/v1/2024.acl-short.18
%U https://aclanthology.org/2024.luhme-short.18/
%U https://doi.org/10.18653/v1/2024.acl-short.18
%P 175-195
Markdown (Informal)
[Monotonic Representation of Numeric Attributes in Language Models](https://aclanthology.org/2024.luhme-short.18/) (Heinzerling & Inui, ACL 2024)
ACL