@inproceedings{kariyakaranage-athuraliya-2026-cal,
title = "{CAL}-Log: Cost-Aware Active Learning with Logarithmic Cognitive Effort Modeling and Online Adaptation to Human Annotation Behavior",
author = "Kariyakaranage, Vihanga Supasan and
Athuraliya, Banuka",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.48/",
pages = "537--553",
ISBN = "979-8-89176-393-7",
abstract = "Active learning (AL) reduces labeled data requirements in NLP, yet most methods optimize label efficiency while ignoring annotation cost. Standard uncertainty sampling assumes uniform effort, leading to suboptimal resource allocation when documents vary in length. Supasan and Athuraliya (2026) introduced CAL-Log, a cost-aware AL variant using logarithmic cost modeling C(x)={\ensuremath{\alpha}}+{\ensuremath{\beta}} log(1+L(x)), where C(x) is the predicted annotation time for document x and L(x) is its token length, grounded in information foraging theory (Pirolli and Card, 1999) and psycholinguistic studies of human skimming (Rayner, 1998). This paper presents CAL-Log in full, extending that preliminary framework with two new contributions: temperature-scaled calibrated entropy and online per-annotator cost adaptation, which together resolve the cold-start calibration bottleneck identified in the prior work. Experiments on ten text classification benchmarks demonstrate a 3.3{\texttimes} speedup over BADGE (Batch Active learning by Diverse Gradient Embeddings; Ash et al., 2020) and 3.9{\texttimes} over Entropy sampling to reach F1=0.80, with large effect sizes (Cohen{'}s d{\ensuremath{>}}0.8). A live annotation deployment with preliminary user evaluation (N=7) confirms that the online cost model produces reading-speed classifications consistent with annotator self-reports, and that a transparency interface successfully communicates the scoring rationale to non-expert users."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kariyakaranage-athuraliya-2026-cal">
<titleInfo>
<title>CAL-Log: Cost-Aware Active Learning with Logarithmic Cognitive Effort Modeling and Online Adaptation to Human Annotation Behavior</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vihanga</namePart>
<namePart type="given">Supasan</namePart>
<namePart type="family">Kariyakaranage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Banuka</namePart>
<namePart type="family">Athuraliya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Active learning (AL) reduces labeled data requirements in NLP, yet most methods optimize label efficiency while ignoring annotation cost. Standard uncertainty sampling assumes uniform effort, leading to suboptimal resource allocation when documents vary in length. Supasan and Athuraliya (2026) introduced CAL-Log, a cost-aware AL variant using logarithmic cost modeling C(x)=\ensuremathα+\ensuremathβ log(1+L(x)), where C(x) is the predicted annotation time for document x and L(x) is its token length, grounded in information foraging theory (Pirolli and Card, 1999) and psycholinguistic studies of human skimming (Rayner, 1998). This paper presents CAL-Log in full, extending that preliminary framework with two new contributions: temperature-scaled calibrated entropy and online per-annotator cost adaptation, which together resolve the cold-start calibration bottleneck identified in the prior work. Experiments on ten text classification benchmarks demonstrate a 3.3× speedup over BADGE (Batch Active learning by Diverse Gradient Embeddings; Ash et al., 2020) and 3.9× over Entropy sampling to reach F1=0.80, with large effect sizes (Cohen’s d\ensuremath>0.8). A live annotation deployment with preliminary user evaluation (N=7) confirms that the online cost model produces reading-speed classifications consistent with annotator self-reports, and that a transparency interface successfully communicates the scoring rationale to non-expert users.</abstract>
<identifier type="citekey">kariyakaranage-athuraliya-2026-cal</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.48/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>537</start>
<end>553</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CAL-Log: Cost-Aware Active Learning with Logarithmic Cognitive Effort Modeling and Online Adaptation to Human Annotation Behavior
%A Kariyakaranage, Vihanga Supasan
%A Athuraliya, Banuka
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F kariyakaranage-athuraliya-2026-cal
%X Active learning (AL) reduces labeled data requirements in NLP, yet most methods optimize label efficiency while ignoring annotation cost. Standard uncertainty sampling assumes uniform effort, leading to suboptimal resource allocation when documents vary in length. Supasan and Athuraliya (2026) introduced CAL-Log, a cost-aware AL variant using logarithmic cost modeling C(x)=\ensuremathα+\ensuremathβ log(1+L(x)), where C(x) is the predicted annotation time for document x and L(x) is its token length, grounded in information foraging theory (Pirolli and Card, 1999) and psycholinguistic studies of human skimming (Rayner, 1998). This paper presents CAL-Log in full, extending that preliminary framework with two new contributions: temperature-scaled calibrated entropy and online per-annotator cost adaptation, which together resolve the cold-start calibration bottleneck identified in the prior work. Experiments on ten text classification benchmarks demonstrate a 3.3× speedup over BADGE (Batch Active learning by Diverse Gradient Embeddings; Ash et al., 2020) and 3.9× over Entropy sampling to reach F1=0.80, with large effect sizes (Cohen’s d\ensuremath>0.8). A live annotation deployment with preliminary user evaluation (N=7) confirms that the online cost model produces reading-speed classifications consistent with annotator self-reports, and that a transparency interface successfully communicates the scoring rationale to non-expert users.
%U https://aclanthology.org/2026.acl-srw.48/
%P 537-553
Markdown (Informal)
[CAL-Log: Cost-Aware Active Learning with Logarithmic Cognitive Effort Modeling and Online Adaptation to Human Annotation Behavior](https://aclanthology.org/2026.acl-srw.48/) (Kariyakaranage & Athuraliya, ACL 2026)
ACL