@inproceedings{cao-etal-2026-scores,
title = "From Scores to Preferences: Redefining Evaluation Paradigm for Speech Quality Reward Modeling",
author = "Cao, Yifei and
Jiang, Changhao and
Zhuang, Jiabao and
Sun, Jiajun and
Zhang, Ming and
Xi, Zhiheng and
Li, Hui and
Dou, Shihan and
Wang, Yuran and
Zhang, Yunke and
Ji, Tao and
Gui, Tao and
Zhang, Qi and
Huang, Xuanjing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1638/",
pages = "32731--32749",
ISBN = "979-8-89176-395-1",
abstract = "Speech quality assessment (SQA) is typically formulated as a score regression task based on subjective ratings, such as the Mean Opinion Score (MOS), which inherently suffer from inconsistent standards and limit cross-dataset training and evaluation. To address these limitations, we reformulate SQA as a preference-based comparison paradigm and construct MOS-Pref, a large-scale MOS-derived preference dataset. Building on MOS-Pref, we systematically implement and evaluate three reward modeling paradigms: scalar, semi-scalar, and generative reward models, alongside existing SQA approaches. Our experiments reveal three key findings: (1) scalar models achieve the strongest overall performance, consistently exceeding 74{\%} accuracy; (2) score regression-based approaches generally underperform preference-based methods in both overall performance and generalization; and (3) all reward models struggle on pairs with very small MOS gap. Motivated by these observations, we propose a MOS-aware GRM design that incorporates MOS gap into the reward function during reinforcement learning. Experimental results show that the MOS-aware GRM significantly improves fine-grained speech quality discrimination. We hope this work fosters more rigorous and scalable research in SQA."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cao-etal-2026-scores">
<titleInfo>
<title>From Scores to Preferences: Redefining Evaluation Paradigm for Speech Quality Reward Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yifei</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Changhao</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiabao</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiheng</namePart>
<namePart type="family">Xi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shihan</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuran</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunke</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Gui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Speech quality assessment (SQA) is typically formulated as a score regression task based on subjective ratings, such as the Mean Opinion Score (MOS), which inherently suffer from inconsistent standards and limit cross-dataset training and evaluation. To address these limitations, we reformulate SQA as a preference-based comparison paradigm and construct MOS-Pref, a large-scale MOS-derived preference dataset. Building on MOS-Pref, we systematically implement and evaluate three reward modeling paradigms: scalar, semi-scalar, and generative reward models, alongside existing SQA approaches. Our experiments reveal three key findings: (1) scalar models achieve the strongest overall performance, consistently exceeding 74% accuracy; (2) score regression-based approaches generally underperform preference-based methods in both overall performance and generalization; and (3) all reward models struggle on pairs with very small MOS gap. Motivated by these observations, we propose a MOS-aware GRM design that incorporates MOS gap into the reward function during reinforcement learning. Experimental results show that the MOS-aware GRM significantly improves fine-grained speech quality discrimination. We hope this work fosters more rigorous and scalable research in SQA.</abstract>
<identifier type="citekey">cao-etal-2026-scores</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1638/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>32731</start>
<end>32749</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Scores to Preferences: Redefining Evaluation Paradigm for Speech Quality Reward Modeling
%A Cao, Yifei
%A Jiang, Changhao
%A Zhuang, Jiabao
%A Sun, Jiajun
%A Zhang, Ming
%A Xi, Zhiheng
%A Li, Hui
%A Dou, Shihan
%A Wang, Yuran
%A Zhang, Yunke
%A Ji, Tao
%A Gui, Tao
%A Zhang, Qi
%A Huang, Xuanjing
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F cao-etal-2026-scores
%X Speech quality assessment (SQA) is typically formulated as a score regression task based on subjective ratings, such as the Mean Opinion Score (MOS), which inherently suffer from inconsistent standards and limit cross-dataset training and evaluation. To address these limitations, we reformulate SQA as a preference-based comparison paradigm and construct MOS-Pref, a large-scale MOS-derived preference dataset. Building on MOS-Pref, we systematically implement and evaluate three reward modeling paradigms: scalar, semi-scalar, and generative reward models, alongside existing SQA approaches. Our experiments reveal three key findings: (1) scalar models achieve the strongest overall performance, consistently exceeding 74% accuracy; (2) score regression-based approaches generally underperform preference-based methods in both overall performance and generalization; and (3) all reward models struggle on pairs with very small MOS gap. Motivated by these observations, we propose a MOS-aware GRM design that incorporates MOS gap into the reward function during reinforcement learning. Experimental results show that the MOS-aware GRM significantly improves fine-grained speech quality discrimination. We hope this work fosters more rigorous and scalable research in SQA.
%U https://aclanthology.org/2026.findings-acl.1638/
%P 32731-32749
Markdown (Informal)
[From Scores to Preferences: Redefining Evaluation Paradigm for Speech Quality Reward Modeling](https://aclanthology.org/2026.findings-acl.1638/) (Cao et al., Findings 2026)
ACL
- Yifei Cao, Changhao Jiang, Jiabao Zhuang, Jiajun Sun, Ming Zhang, Zhiheng Xi, Hui Li, Shihan Dou, Yuran Wang, Yunke Zhang, Tao Ji, Tao Gui, Qi Zhang, and Xuanjing Huang. 2026. From Scores to Preferences: Redefining Evaluation Paradigm for Speech Quality Reward Modeling. In Findings of the Association for Computational Linguistics: ACL 2026, pages 32731–32749, San Diego, California, United States. Association for Computational Linguistics.