@inproceedings{li-etal-2025-libra,
title = "Libra-Leaderboard: Towards Responsible {AI} through a Balanced Leaderboard of Safety and Capability",
author = "Li, Haonan and
Han, Xudong and
Zhai, Zenan and
Mu, Honglin and
Wang, Hao and
Zhang, Zhenxuan and
Geng, Yilin and
Lin, Shom and
Wang, Renxi and
Shelmanov, Artem and
Qi, Xiangyu and
Wang, Yuxia and
Hong, Donghai and
Yuan, Youliang and
Chen, Meng and
Tu, Haoqin and
Koto, Fajri and
Zeng, Cong and
Kuribayashi, Tatsuki and
Bhardwaj, Rishabh and
Zhao, Bingchen and
Duan, Yawen and
Liu, Yi and
Alghamdi, Emad A. and
Yang, Yaodong and
Dong, Yinpeng and
Poria, Soujanya and
Liu, Pengfei and
Liu, Zhengzhong and
Ren, Hector Xuguang and
Hovy, Eduard and
Gurevych, Iryna and
Nakov, Preslav and
Choudhury, Monojit and
Baldwin, Timothy",
editor = "Dziri, Nouha and
Ren, Sean (Xiang) and
Diao, Shizhe",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (System Demonstrations)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-demo.23/",
doi = "10.18653/v1/2025.naacl-demo.23",
pages = "268--286",
ISBN = "979-8-89176-191-9",
abstract = "As large language models (LLMs) continue to evolve, leaderboards play a significant role in steering their development. Existing leaderboards often prioritize model capabilities while overlooking safety concerns, leaving a significant gap in responsible AI development. To address this gap, we introduce Libra-Leaderboard, a comprehensive framework designed to rank LLMs through a balanced evaluation of performance and safety. Combining a dynamic leaderboard with an interactive LLM arena, Libra-Leaderboard encourages the joint optimization of capability and safety. Unlike traditional approaches that average performance and safety metrics, Libra-Leaderboard uses a distance-to-optimal-score method to calculate the overall rankings. This approach incentivizes models to achieve a balance rather than excelling in one dimension at the expense of some other ones. In the first release, Libra-Leaderboard evaluates 26 mainstream LLMs from 14 leading organizations, identifying critical safety challenges even in state-of-the-art models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-libra">
<titleInfo>
<title>Libra-Leaderboard: Towards Responsible AI through a Balanced Leaderboard of Safety and Capability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haonan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xudong</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zenan</namePart>
<namePart type="family">Zhai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Honglin</namePart>
<namePart type="family">Mu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenxuan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yilin</namePart>
<namePart type="family">Geng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shom</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Renxi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Qi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Donghai</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youliang</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoqin</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fajri</namePart>
<namePart type="family">Koto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cong</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatsuki</namePart>
<namePart type="family">Kuribayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="family">Bhardwaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bingchen</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yawen</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emad</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Alghamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaodong</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinpeng</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengzhong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hector</namePart>
<namePart type="given">Xuguang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nouha</namePart>
<namePart type="family">Dziri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="given">(Xiang)</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhe</namePart>
<namePart type="family">Diao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-191-9</identifier>
</relatedItem>
<abstract>As large language models (LLMs) continue to evolve, leaderboards play a significant role in steering their development. Existing leaderboards often prioritize model capabilities while overlooking safety concerns, leaving a significant gap in responsible AI development. To address this gap, we introduce Libra-Leaderboard, a comprehensive framework designed to rank LLMs through a balanced evaluation of performance and safety. Combining a dynamic leaderboard with an interactive LLM arena, Libra-Leaderboard encourages the joint optimization of capability and safety. Unlike traditional approaches that average performance and safety metrics, Libra-Leaderboard uses a distance-to-optimal-score method to calculate the overall rankings. This approach incentivizes models to achieve a balance rather than excelling in one dimension at the expense of some other ones. In the first release, Libra-Leaderboard evaluates 26 mainstream LLMs from 14 leading organizations, identifying critical safety challenges even in state-of-the-art models.</abstract>
<identifier type="citekey">li-etal-2025-libra</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-demo.23</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-demo.23/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>268</start>
<end>286</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Libra-Leaderboard: Towards Responsible AI through a Balanced Leaderboard of Safety and Capability
%A Li, Haonan
%A Han, Xudong
%A Zhai, Zenan
%A Mu, Honglin
%A Wang, Hao
%A Zhang, Zhenxuan
%A Geng, Yilin
%A Lin, Shom
%A Wang, Renxi
%A Shelmanov, Artem
%A Qi, Xiangyu
%A Wang, Yuxia
%A Hong, Donghai
%A Yuan, Youliang
%A Chen, Meng
%A Tu, Haoqin
%A Koto, Fajri
%A Zeng, Cong
%A Kuribayashi, Tatsuki
%A Bhardwaj, Rishabh
%A Zhao, Bingchen
%A Duan, Yawen
%A Liu, Yi
%A Alghamdi, Emad A.
%A Yang, Yaodong
%A Dong, Yinpeng
%A Poria, Soujanya
%A Liu, Pengfei
%A Liu, Zhengzhong
%A Ren, Hector Xuguang
%A Hovy, Eduard
%A Gurevych, Iryna
%A Nakov, Preslav
%A Choudhury, Monojit
%A Baldwin, Timothy
%Y Dziri, Nouha
%Y Ren, Sean (Xiang)
%Y Diao, Shizhe
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (System Demonstrations)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-191-9
%F li-etal-2025-libra
%X As large language models (LLMs) continue to evolve, leaderboards play a significant role in steering their development. Existing leaderboards often prioritize model capabilities while overlooking safety concerns, leaving a significant gap in responsible AI development. To address this gap, we introduce Libra-Leaderboard, a comprehensive framework designed to rank LLMs through a balanced evaluation of performance and safety. Combining a dynamic leaderboard with an interactive LLM arena, Libra-Leaderboard encourages the joint optimization of capability and safety. Unlike traditional approaches that average performance and safety metrics, Libra-Leaderboard uses a distance-to-optimal-score method to calculate the overall rankings. This approach incentivizes models to achieve a balance rather than excelling in one dimension at the expense of some other ones. In the first release, Libra-Leaderboard evaluates 26 mainstream LLMs from 14 leading organizations, identifying critical safety challenges even in state-of-the-art models.
%R 10.18653/v1/2025.naacl-demo.23
%U https://aclanthology.org/2025.naacl-demo.23/
%U https://doi.org/10.18653/v1/2025.naacl-demo.23
%P 268-286
Markdown (Informal)
[Libra-Leaderboard: Towards Responsible AI through a Balanced Leaderboard of Safety and Capability](https://aclanthology.org/2025.naacl-demo.23/) (Li et al., NAACL 2025)
ACL
- Haonan Li, Xudong Han, Zenan Zhai, Honglin Mu, Hao Wang, Zhenxuan Zhang, Yilin Geng, Shom Lin, Renxi Wang, Artem Shelmanov, Xiangyu Qi, Yuxia Wang, Donghai Hong, Youliang Yuan, Meng Chen, Haoqin Tu, Fajri Koto, Cong Zeng, Tatsuki Kuribayashi, Rishabh Bhardwaj, Bingchen Zhao, Yawen Duan, Yi Liu, Emad A. Alghamdi, Yaodong Yang, Yinpeng Dong, Soujanya Poria, Pengfei Liu, Zhengzhong Liu, Hector Xuguang Ren, Eduard Hovy, Iryna Gurevych, Preslav Nakov, Monojit Choudhury, and Timothy Baldwin. 2025. Libra-Leaderboard: Towards Responsible AI through a Balanced Leaderboard of Safety and Capability. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (System Demonstrations), pages 268–286, Albuquerque, New Mexico. Association for Computational Linguistics.