@inproceedings{liao-etal-2026-nl,
title = "{NL} $\Rightarrow$ Schedule: Evaluate Multitask Scheduling Capability of Large Language Models",
author = "Liao, Wenrui and
Du, Weihong and
Li, Yi and
Liang, Hongru and
Lei, Wenqiang",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1648/",
pages = "35620--35640",
ISBN = "979-8-89176-390-6",
abstract = "Automated schedule generation for multitask from natural language descriptions has huge potential in modern industry. While classic methods bypass language complexities by using pre-formatted matrices, and recent LLM+solver approaches introduce new fragilities by relying on solver-specific code generation. This raises critical questions: Can large language models (LLMs) solve this NL $\Rightarrow$ Schedule task end-to-end well(RQ1)? If the answer is ``no'', where do they fall short(RQ2)? And how can their capabilities be enhanced (RQ3)? To answer these questions, we introduce NL $\Rightarrow$ Schedule, the first benchmark for this task, equipped with a dataset of 240 description-schedule pairs constructed from real-world materials and a rigorous evaluation suite. Our evaluation of nine state-of-the-art LLMs reveals the limitations of different LLMs in procedure grounding and the strengths of advanced LLMs in global planning via local analysis. To address these shortcomings, we propose Mans, a novel multi-agent framework. Extensive experiments show that Mans achieves more robust performance comparable to six state-of-the-art LLM+solver methods. We hope NL $\Rightarrow$ Schedule and Mans will serve as a solid foundation for automatic scheduling."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liao-etal-2026-nl">
<titleInfo>
<title>NL \Rightarrow Schedule: Evaluate Multitask Scheduling Capability of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenrui</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weihong</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongru</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenqiang</namePart>
<namePart type="family">Lei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Automated schedule generation for multitask from natural language descriptions has huge potential in modern industry. While classic methods bypass language complexities by using pre-formatted matrices, and recent LLM+solver approaches introduce new fragilities by relying on solver-specific code generation. This raises critical questions: Can large language models (LLMs) solve this NL \Rightarrow Schedule task end-to-end well(RQ1)? If the answer is “no”, where do they fall short(RQ2)? And how can their capabilities be enhanced (RQ3)? To answer these questions, we introduce NL \Rightarrow Schedule, the first benchmark for this task, equipped with a dataset of 240 description-schedule pairs constructed from real-world materials and a rigorous evaluation suite. Our evaluation of nine state-of-the-art LLMs reveals the limitations of different LLMs in procedure grounding and the strengths of advanced LLMs in global planning via local analysis. To address these shortcomings, we propose Mans, a novel multi-agent framework. Extensive experiments show that Mans achieves more robust performance comparable to six state-of-the-art LLM+solver methods. We hope NL \Rightarrow Schedule and Mans will serve as a solid foundation for automatic scheduling.</abstract>
<identifier type="citekey">liao-etal-2026-nl</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1648/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>35620</start>
<end>35640</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NL \Rightarrow Schedule: Evaluate Multitask Scheduling Capability of Large Language Models
%A Liao, Wenrui
%A Du, Weihong
%A Li, Yi
%A Liang, Hongru
%A Lei, Wenqiang
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F liao-etal-2026-nl
%X Automated schedule generation for multitask from natural language descriptions has huge potential in modern industry. While classic methods bypass language complexities by using pre-formatted matrices, and recent LLM+solver approaches introduce new fragilities by relying on solver-specific code generation. This raises critical questions: Can large language models (LLMs) solve this NL \Rightarrow Schedule task end-to-end well(RQ1)? If the answer is “no”, where do they fall short(RQ2)? And how can their capabilities be enhanced (RQ3)? To answer these questions, we introduce NL \Rightarrow Schedule, the first benchmark for this task, equipped with a dataset of 240 description-schedule pairs constructed from real-world materials and a rigorous evaluation suite. Our evaluation of nine state-of-the-art LLMs reveals the limitations of different LLMs in procedure grounding and the strengths of advanced LLMs in global planning via local analysis. To address these shortcomings, we propose Mans, a novel multi-agent framework. Extensive experiments show that Mans achieves more robust performance comparable to six state-of-the-art LLM+solver methods. We hope NL \Rightarrow Schedule and Mans will serve as a solid foundation for automatic scheduling.
%U https://aclanthology.org/2026.acl-long.1648/
%P 35620-35640
Markdown (Informal)
[NL ⇒ Schedule: Evaluate Multitask Scheduling Capability of Large Language Models](https://aclanthology.org/2026.acl-long.1648/) (Liao et al., ACL 2026)
ACL