@inproceedings{li-etal-2026-guidelines,
title = "Guidelines as Environments: A World Model Approach to Rule Following",
author = "Li, Haiqing and
Zhong, Wenliang and
Wu, Yinhao and
Ma, Hehuan and
Guo, Yuzhi and
Dang, Thao M. and
Huang, Junzhou",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.741/",
doi = "10.18653/v1/2026.acl-long.741",
pages = "16302--16318",
ISBN = "979-8-89176-390-6",
abstract = "Guideline-following is increasingly important in compliance, customer support, and other regulated workflows, where correctness is defined by explicit rule systems rather than heuristics. Learning to follow guidelines is challenging because guidelines are interdependent: rules can trigger, suppress, or conflict with one another, while locally plausible responses may violate global constraints. Most existing methods treat guidelines as static text and rely on implicit reasoning or deeper decoding, making rule interactions and satisfaction status hard to observe and control. A more feasible approach is to model guideline execution with an explicit state that tracks evolving rule evidence across steps. However, conventional world models are a poor fit: they typically assume privileged feedback or well-defined transition dynamics, assumptions that do not hold when reasoning occurs purely in language space under ambiguous, text-defined constraints. As a solution, we propose RGCWM, a Rule-Grounded Causal World Model that builds an explicit state space from the guideline text itself. RGCWM represents rule applicability and satisfaction as a continuously updated evidence state, externalizes inter-rule dependencies as a causal structure, and plans at inference time by counterfactually evaluating candidate responses under model-estimated state transitions. Experiments show that this shift from implicit text reasoning to state-based reasoning enables stable, controllable execution of complex interacting rules across diverse domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2026-guidelines">
<titleInfo>
<title>Guidelines as Environments: A World Model Approach to Rule Following</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haiqing</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenliang</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinhao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hehuan</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuzhi</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thao</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Dang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junzhou</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Guideline-following is increasingly important in compliance, customer support, and other regulated workflows, where correctness is defined by explicit rule systems rather than heuristics. Learning to follow guidelines is challenging because guidelines are interdependent: rules can trigger, suppress, or conflict with one another, while locally plausible responses may violate global constraints. Most existing methods treat guidelines as static text and rely on implicit reasoning or deeper decoding, making rule interactions and satisfaction status hard to observe and control. A more feasible approach is to model guideline execution with an explicit state that tracks evolving rule evidence across steps. However, conventional world models are a poor fit: they typically assume privileged feedback or well-defined transition dynamics, assumptions that do not hold when reasoning occurs purely in language space under ambiguous, text-defined constraints. As a solution, we propose RGCWM, a Rule-Grounded Causal World Model that builds an explicit state space from the guideline text itself. RGCWM represents rule applicability and satisfaction as a continuously updated evidence state, externalizes inter-rule dependencies as a causal structure, and plans at inference time by counterfactually evaluating candidate responses under model-estimated state transitions. Experiments show that this shift from implicit text reasoning to state-based reasoning enables stable, controllable execution of complex interacting rules across diverse domains.</abstract>
<identifier type="citekey">li-etal-2026-guidelines</identifier>
<identifier type="doi">10.18653/v1/2026.acl-long.741</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.741/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>16302</start>
<end>16318</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Guidelines as Environments: A World Model Approach to Rule Following
%A Li, Haiqing
%A Zhong, Wenliang
%A Wu, Yinhao
%A Ma, Hehuan
%A Guo, Yuzhi
%A Dang, Thao M.
%A Huang, Junzhou
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F li-etal-2026-guidelines
%X Guideline-following is increasingly important in compliance, customer support, and other regulated workflows, where correctness is defined by explicit rule systems rather than heuristics. Learning to follow guidelines is challenging because guidelines are interdependent: rules can trigger, suppress, or conflict with one another, while locally plausible responses may violate global constraints. Most existing methods treat guidelines as static text and rely on implicit reasoning or deeper decoding, making rule interactions and satisfaction status hard to observe and control. A more feasible approach is to model guideline execution with an explicit state that tracks evolving rule evidence across steps. However, conventional world models are a poor fit: they typically assume privileged feedback or well-defined transition dynamics, assumptions that do not hold when reasoning occurs purely in language space under ambiguous, text-defined constraints. As a solution, we propose RGCWM, a Rule-Grounded Causal World Model that builds an explicit state space from the guideline text itself. RGCWM represents rule applicability and satisfaction as a continuously updated evidence state, externalizes inter-rule dependencies as a causal structure, and plans at inference time by counterfactually evaluating candidate responses under model-estimated state transitions. Experiments show that this shift from implicit text reasoning to state-based reasoning enables stable, controllable execution of complex interacting rules across diverse domains.
%R 10.18653/v1/2026.acl-long.741
%U https://aclanthology.org/2026.acl-long.741/
%U https://doi.org/10.18653/v1/2026.acl-long.741
%P 16302-16318
Markdown (Informal)
[Guidelines as Environments: A World Model Approach to Rule Following](https://aclanthology.org/2026.acl-long.741/) (Li et al., ACL 2026)
ACL
- Haiqing Li, Wenliang Zhong, Yinhao Wu, Hehuan Ma, Yuzhi Guo, Thao M. Dang, and Junzhou Huang. 2026. Guidelines as Environments: A World Model Approach to Rule Following. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 16302–16318, San Diego, California, United States. Association for Computational Linguistics.