@inproceedings{qin-goldwasser-2026-iterative,
title = "Iterative Dual-Model Alignment for Story Evaluation",
author = "Qin, Bruce and
Goldwasser, Dan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.648/",
pages = "14251--14264",
ISBN = "979-8-89176-390-6",
abstract = "Large language models (LLMs) can both evaluate and explain text quality; however, most existing evaluators operate as static classifiers and lack the ability to refine their reasoning through interaction. We propose an \textbf{Iterative Alpha{--}Beta Learning} framework that jointly trains two complementary 8B models: an Alpha ($\alpha$) classifier that assesses pairwise story engagement, and a Beta ($\beta$) generator that produces structured, rubric-guided comparative explanations. The two models co-evolve within a closed feedback loop: $\alpha$ provides probabilistic preference signals to guide $\beta${'}s Direct Preference Optimization (DPO), while $\beta${'}s improved explanations are reintegrated to retrain $\alpha$ via a KL-based contrastive objective. This dual optimization enables mutual learning: $\alpha$ gains interpretability and robustness from $\beta${'}s textual rationales, while $\beta$ acquires stronger alignment and discriminative precision from $\alpha${'}s confidence deltas. Experiments on human-annotated story-pair datasets HANNA show that the proposed system consistently outperforms strong single-model baselines in both accuracy and explanation quality across multiple iterative rounds."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qin-goldwasser-2026-iterative">
<titleInfo>
<title>Iterative Dual-Model Alignment for Story Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bruce</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Goldwasser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Large language models (LLMs) can both evaluate and explain text quality; however, most existing evaluators operate as static classifiers and lack the ability to refine their reasoning through interaction. We propose an Iterative Alpha–Beta Learning framework that jointly trains two complementary 8B models: an Alpha (α) classifier that assesses pairwise story engagement, and a Beta (β) generator that produces structured, rubric-guided comparative explanations. The two models co-evolve within a closed feedback loop: α provides probabilistic preference signals to guide β’s Direct Preference Optimization (DPO), while β’s improved explanations are reintegrated to retrain α via a KL-based contrastive objective. This dual optimization enables mutual learning: α gains interpretability and robustness from β’s textual rationales, while β acquires stronger alignment and discriminative precision from α’s confidence deltas. Experiments on human-annotated story-pair datasets HANNA show that the proposed system consistently outperforms strong single-model baselines in both accuracy and explanation quality across multiple iterative rounds.</abstract>
<identifier type="citekey">qin-goldwasser-2026-iterative</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.648/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>14251</start>
<end>14264</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Iterative Dual-Model Alignment for Story Evaluation
%A Qin, Bruce
%A Goldwasser, Dan
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F qin-goldwasser-2026-iterative
%X Large language models (LLMs) can both evaluate and explain text quality; however, most existing evaluators operate as static classifiers and lack the ability to refine their reasoning through interaction. We propose an Iterative Alpha–Beta Learning framework that jointly trains two complementary 8B models: an Alpha (α) classifier that assesses pairwise story engagement, and a Beta (β) generator that produces structured, rubric-guided comparative explanations. The two models co-evolve within a closed feedback loop: α provides probabilistic preference signals to guide β’s Direct Preference Optimization (DPO), while β’s improved explanations are reintegrated to retrain α via a KL-based contrastive objective. This dual optimization enables mutual learning: α gains interpretability and robustness from β’s textual rationales, while β acquires stronger alignment and discriminative precision from α’s confidence deltas. Experiments on human-annotated story-pair datasets HANNA show that the proposed system consistently outperforms strong single-model baselines in both accuracy and explanation quality across multiple iterative rounds.
%U https://aclanthology.org/2026.acl-long.648/
%P 14251-14264
Markdown (Informal)
[Iterative Dual-Model Alignment for Story Evaluation](https://aclanthology.org/2026.acl-long.648/) (Qin & Goldwasser, ACL 2026)
ACL
- Bruce Qin and Dan Goldwasser. 2026. Iterative Dual-Model Alignment for Story Evaluation. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 14251–14264, San Diego, California, United States. Association for Computational Linguistics.