@inproceedings{subramani-2026-harnessing,
title = "Harnessing the Latent Space: From Steering Vectors to Model Calibrators for Control and Trust",
author = "Subramani, Nishant",
editor = "Elazar, Yanai and
Ettinger, Allyson and
Kassner, Nora and
Ruder, Sebastian",
booktitle = "Proceedings of The Big Picture v2: Crafting a Research Narrative",
month = jul,
year = "2026",
address = "San Diego, CA, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bigpicture-main.10/",
doi = "10.18653/v1/2026.bigpicture-main.10",
pages = "119--130",
ISBN = "979-8-89176-416-3",
abstract = "Language models have changed from unreliable text generators to highly-capable large models with trillions of parameters. Capability increases come hand-in-hand with increases in scale, making understanding the internal representations of models more challenging. Since millions of users increasing rely on language models to interact with external tools or make decisions in medium or high-stakes scenarios, we need to establish control over model behavior and know when to trust model outputs. In this paper, we discuss our contributions on harnessing the latent spaces by proposing steering vectors for control and developing latent space-based model calibrators for trust. Together, our contributions help demystify the latent spaces of language models and offer new insights into how to harness model internals to build more trustworthy language technology."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="subramani-2026-harnessing">
<titleInfo>
<title>Harnessing the Latent Space: From Steering Vectors to Model Calibrators for Control and Trust</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nishant</namePart>
<namePart type="family">Subramani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Big Picture v2: Crafting a Research Narrative</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allyson</namePart>
<namePart type="family">Ettinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Kassner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, CA, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-416-3</identifier>
</relatedItem>
<abstract>Language models have changed from unreliable text generators to highly-capable large models with trillions of parameters. Capability increases come hand-in-hand with increases in scale, making understanding the internal representations of models more challenging. Since millions of users increasing rely on language models to interact with external tools or make decisions in medium or high-stakes scenarios, we need to establish control over model behavior and know when to trust model outputs. In this paper, we discuss our contributions on harnessing the latent spaces by proposing steering vectors for control and developing latent space-based model calibrators for trust. Together, our contributions help demystify the latent spaces of language models and offer new insights into how to harness model internals to build more trustworthy language technology.</abstract>
<identifier type="citekey">subramani-2026-harnessing</identifier>
<identifier type="doi">10.18653/v1/2026.bigpicture-main.10</identifier>
<location>
<url>https://aclanthology.org/2026.bigpicture-main.10/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>119</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Harnessing the Latent Space: From Steering Vectors to Model Calibrators for Control and Trust
%A Subramani, Nishant
%Y Elazar, Yanai
%Y Ettinger, Allyson
%Y Kassner, Nora
%Y Ruder, Sebastian
%S Proceedings of The Big Picture v2: Crafting a Research Narrative
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, CA, USA
%@ 979-8-89176-416-3
%F subramani-2026-harnessing
%X Language models have changed from unreliable text generators to highly-capable large models with trillions of parameters. Capability increases come hand-in-hand with increases in scale, making understanding the internal representations of models more challenging. Since millions of users increasing rely on language models to interact with external tools or make decisions in medium or high-stakes scenarios, we need to establish control over model behavior and know when to trust model outputs. In this paper, we discuss our contributions on harnessing the latent spaces by proposing steering vectors for control and developing latent space-based model calibrators for trust. Together, our contributions help demystify the latent spaces of language models and offer new insights into how to harness model internals to build more trustworthy language technology.
%R 10.18653/v1/2026.bigpicture-main.10
%U https://aclanthology.org/2026.bigpicture-main.10/
%U https://doi.org/10.18653/v1/2026.bigpicture-main.10
%P 119-130
Markdown (Informal)
[Harnessing the Latent Space: From Steering Vectors to Model Calibrators for Control and Trust](https://aclanthology.org/2026.bigpicture-main.10/) (Subramani, BigPicture 2026)
ACL