@inproceedings{huang-etal-2026-modeling,
title = "Modeling {LLM} Agent Reviewer Dynamics in Elo-Ranked Review System",
author = "Huang, Hsiang-Wei and
Lu, Junbin and
Chen, Kuang-Ming and
Shangguan, Jianxu and
Hwang, Jenq-Neng",
editor = "Gupta, Vivek and
Ding, Kaize and
Kokel, Harsha and
Zhao, Yue and
Agarwal, Amit and
Wang, Yu and
Glass, Michael and
Zhang, Yu and
Srinivas, Kavitha and
Chen, Xiusi and
Hassanzadeh, Oktie and
Zhu, Qi and
Chang, Shuaichen and
Luo, Yuan",
booktitle = "Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the {LLM} Era ({SURG}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.surgellm-1.11/",
pages = "182--189",
ISBN = "979-8-89176-406-4",
abstract = "In this work, we explore the Large Language Model (LLM) agent reviewer dynamics in an Elo-ranked review system using real-world conference paper submissions. Multiple LLM agent reviewers with different personas engage in multi round review interactions moderated by an Area Chair. We compare a baseline setting with conditions that incorporate Elo ratings and reviewer memory. Our simulation results showcase several interesting findings, including how incorporating Elo improves Area Chair decision accuracy, as well as reviewers' adaptive review strategies that exploits our Elo system without improving review effort. These findings show how the Elo system affects peer review and offer insights for improving AI conference evaluation. Our code is available at https://github.com/hsiangwei0903/EloReview."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2026-modeling">
<titleInfo>
<title>Modeling LLM Agent Reviewer Dynamics in Elo-Ranked Review System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hsiang-Wei</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junbin</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kuang-Ming</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianxu</namePart>
<namePart type="family">Shangguan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenq-Neng</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the LLM Era (SURGeLLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaize</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harsha</namePart>
<namePart type="family">Kokel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Glass</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kavitha</namePart>
<namePart type="family">Srinivas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiusi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oktie</namePart>
<namePart type="family">Hassanzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuaichen</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-406-4</identifier>
</relatedItem>
<abstract>In this work, we explore the Large Language Model (LLM) agent reviewer dynamics in an Elo-ranked review system using real-world conference paper submissions. Multiple LLM agent reviewers with different personas engage in multi round review interactions moderated by an Area Chair. We compare a baseline setting with conditions that incorporate Elo ratings and reviewer memory. Our simulation results showcase several interesting findings, including how incorporating Elo improves Area Chair decision accuracy, as well as reviewers’ adaptive review strategies that exploits our Elo system without improving review effort. These findings show how the Elo system affects peer review and offer insights for improving AI conference evaluation. Our code is available at https://github.com/hsiangwei0903/EloReview.</abstract>
<identifier type="citekey">huang-etal-2026-modeling</identifier>
<location>
<url>https://aclanthology.org/2026.surgellm-1.11/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>182</start>
<end>189</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modeling LLM Agent Reviewer Dynamics in Elo-Ranked Review System
%A Huang, Hsiang-Wei
%A Lu, Junbin
%A Chen, Kuang-Ming
%A Shangguan, Jianxu
%A Hwang, Jenq-Neng
%Y Gupta, Vivek
%Y Ding, Kaize
%Y Kokel, Harsha
%Y Zhao, Yue
%Y Agarwal, Amit
%Y Wang, Yu
%Y Glass, Michael
%Y Zhang, Yu
%Y Srinivas, Kavitha
%Y Chen, Xiusi
%Y Hassanzadeh, Oktie
%Y Zhu, Qi
%Y Chang, Shuaichen
%Y Luo, Yuan
%S Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the LLM Era (SURGeLLM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-406-4
%F huang-etal-2026-modeling
%X In this work, we explore the Large Language Model (LLM) agent reviewer dynamics in an Elo-ranked review system using real-world conference paper submissions. Multiple LLM agent reviewers with different personas engage in multi round review interactions moderated by an Area Chair. We compare a baseline setting with conditions that incorporate Elo ratings and reviewer memory. Our simulation results showcase several interesting findings, including how incorporating Elo improves Area Chair decision accuracy, as well as reviewers’ adaptive review strategies that exploits our Elo system without improving review effort. These findings show how the Elo system affects peer review and offer insights for improving AI conference evaluation. Our code is available at https://github.com/hsiangwei0903/EloReview.
%U https://aclanthology.org/2026.surgellm-1.11/
%P 182-189
Markdown (Informal)
[Modeling LLM Agent Reviewer Dynamics in Elo-Ranked Review System](https://aclanthology.org/2026.surgellm-1.11/) (Huang et al., SURGeLLM 2026)
ACL
- Hsiang-Wei Huang, Junbin Lu, Kuang-Ming Chen, Jianxu Shangguan, and Jenq-Neng Hwang. 2026. Modeling LLM Agent Reviewer Dynamics in Elo-Ranked Review System. In Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the LLM Era (SURGeLLM 2026), pages 182–189, San Diego, California, United States. Association for Computational Linguistics.