@inproceedings{mittal-etal-2025-litmus,
title = "{LITMUS}++ : An Agentic System for Predictive Analysis of Low-Resource Languages Across Tasks and Models",
author = "Mittal, Avni and
Kumar, Shanu and
Dandapat, Sandipan and
Choudhury, Monojit",
editor = "Liu, Xuebo and
Purwarianti, Ayu",
booktitle = "Proceedings of The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics: System Demonstrations",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.ijcnlp-demo.6/",
pages = "47--54",
ISBN = "979-8-89176-301-2",
abstract = "We present LITMUS++, an agentic system for predicting language-model performance for queries of the form ``How will a Model perform on a Task in a Language?'', a persistent challenge in multilingual and low-resource settings, settings where benchmarks are incomplete or unavailable. Unlike static evaluation suites or opaque LLM-as-judge pipelines, LITMUS++ implements an agentic, auditable workflow: a Directed Acyclic Graph of specialized Thought Agents that generate hypotheses, retrieve multilingual evidence, select predictive features, and train lightweight regressors with calibrated uncertainty. The system supports interactive querying through a chat-style interface, enabling users to inspect reasoning traces and cited evidence. Experiments across six tasks and five multilingual scenarios show that LITMUS++ delivers accurate and interpretable performance predictions, including in low-resource and unseen conditions. Code is available at https://github.com/AvniMittal13/litmus{\_}plus{\_}plus."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mittal-etal-2025-litmus">
<titleInfo>
<title>LITMUS++ : An Agentic System for Predictive Analysis of Low-Resource Languages Across Tasks and Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Avni</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shanu</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandipan</namePart>
<namePart type="family">Dandapat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xuebo</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayu</namePart>
<namePart type="family">Purwarianti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-301-2</identifier>
</relatedItem>
<abstract>We present LITMUS++, an agentic system for predicting language-model performance for queries of the form “How will a Model perform on a Task in a Language?”, a persistent challenge in multilingual and low-resource settings, settings where benchmarks are incomplete or unavailable. Unlike static evaluation suites or opaque LLM-as-judge pipelines, LITMUS++ implements an agentic, auditable workflow: a Directed Acyclic Graph of specialized Thought Agents that generate hypotheses, retrieve multilingual evidence, select predictive features, and train lightweight regressors with calibrated uncertainty. The system supports interactive querying through a chat-style interface, enabling users to inspect reasoning traces and cited evidence. Experiments across six tasks and five multilingual scenarios show that LITMUS++ delivers accurate and interpretable performance predictions, including in low-resource and unseen conditions. Code is available at https://github.com/AvniMittal13/litmus_plus_plus.</abstract>
<identifier type="citekey">mittal-etal-2025-litmus</identifier>
<location>
<url>https://aclanthology.org/2025.ijcnlp-demo.6/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>47</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LITMUS++ : An Agentic System for Predictive Analysis of Low-Resource Languages Across Tasks and Models
%A Mittal, Avni
%A Kumar, Shanu
%A Dandapat, Sandipan
%A Choudhury, Monojit
%Y Liu, Xuebo
%Y Purwarianti, Ayu
%S Proceedings of The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics: System Demonstrations
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-301-2
%F mittal-etal-2025-litmus
%X We present LITMUS++, an agentic system for predicting language-model performance for queries of the form “How will a Model perform on a Task in a Language?”, a persistent challenge in multilingual and low-resource settings, settings where benchmarks are incomplete or unavailable. Unlike static evaluation suites or opaque LLM-as-judge pipelines, LITMUS++ implements an agentic, auditable workflow: a Directed Acyclic Graph of specialized Thought Agents that generate hypotheses, retrieve multilingual evidence, select predictive features, and train lightweight regressors with calibrated uncertainty. The system supports interactive querying through a chat-style interface, enabling users to inspect reasoning traces and cited evidence. Experiments across six tasks and five multilingual scenarios show that LITMUS++ delivers accurate and interpretable performance predictions, including in low-resource and unseen conditions. Code is available at https://github.com/AvniMittal13/litmus_plus_plus.
%U https://aclanthology.org/2025.ijcnlp-demo.6/
%P 47-54
Markdown (Informal)
[LITMUS++ : An Agentic System for Predictive Analysis of Low-Resource Languages Across Tasks and Models](https://aclanthology.org/2025.ijcnlp-demo.6/) (Mittal et al., IJCNLP 2025)
ACL