@inproceedings{hahn-han-2026-query4regex,
title = "{Q}uery4{R}egex: Verifiable Regex Transformation through Formal Operations from {NL} and {DSL} Queries",
author = "Hahn, Joonghyuk and
Han, Yo-Sub",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.331/",
pages = "6297--6305",
ISBN = "979-8-89176-386-9",
abstract = "While large language models (LLMs) excel at generating structured data, such as code, their ability to precisely manipulate it based on instructions remains relatively under-explored. Regular expressions (regexes), critical in practice, are challenging to manipulate. Crucially, the correctness of transformations can be mathematically verified, making them exceptionally well-suited for measuring the symbolic reasoning of LLMs. We introduce Query4Regex, a new benchmark for evaluating verifiable transformations on regexes. Our benchmark tests two query formats: natural language instructions and a program-like domain-specific language (DSL) that specifies the sequence of operations. We evaluate a range of LLMs, verifying semantic correctness through rigorous deterministic finite automata (DFA) equivalence testing. Our empirical studies reveal: 1) the formal DSL significantly outperforms natural language, achieving up to 6.74{\%}p accuracy gains on average. 2) Performance for both formats degrades sharply as compositional complexity increases, highlighting a core challenge in multi-step reasoning. 3) Models often generate plausible but unparsable outputs. Even among parsable outputs, semantic errors remain common, making failures difficult to detect without formal verification. Query4Regex provides a robust framework for analyzing the gap between LLMs' linguistic fluency and their symbolic reasoning, paving the way for more reliable and verifiable manipulation of formal languages. Our code is available at https://github.com/peer0/Query4Regex."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hahn-han-2026-query4regex">
<titleInfo>
<title>Query4Regex: Verifiable Regex Transformation through Formal Operations from NL and DSL Queries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joonghyuk</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yo-Sub</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>While large language models (LLMs) excel at generating structured data, such as code, their ability to precisely manipulate it based on instructions remains relatively under-explored. Regular expressions (regexes), critical in practice, are challenging to manipulate. Crucially, the correctness of transformations can be mathematically verified, making them exceptionally well-suited for measuring the symbolic reasoning of LLMs. We introduce Query4Regex, a new benchmark for evaluating verifiable transformations on regexes. Our benchmark tests two query formats: natural language instructions and a program-like domain-specific language (DSL) that specifies the sequence of operations. We evaluate a range of LLMs, verifying semantic correctness through rigorous deterministic finite automata (DFA) equivalence testing. Our empirical studies reveal: 1) the formal DSL significantly outperforms natural language, achieving up to 6.74%p accuracy gains on average. 2) Performance for both formats degrades sharply as compositional complexity increases, highlighting a core challenge in multi-step reasoning. 3) Models often generate plausible but unparsable outputs. Even among parsable outputs, semantic errors remain common, making failures difficult to detect without formal verification. Query4Regex provides a robust framework for analyzing the gap between LLMs’ linguistic fluency and their symbolic reasoning, paving the way for more reliable and verifiable manipulation of formal languages. Our code is available at https://github.com/peer0/Query4Regex.</abstract>
<identifier type="citekey">hahn-han-2026-query4regex</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.331/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>6297</start>
<end>6305</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Query4Regex: Verifiable Regex Transformation through Formal Operations from NL and DSL Queries
%A Hahn, Joonghyuk
%A Han, Yo-Sub
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F hahn-han-2026-query4regex
%X While large language models (LLMs) excel at generating structured data, such as code, their ability to precisely manipulate it based on instructions remains relatively under-explored. Regular expressions (regexes), critical in practice, are challenging to manipulate. Crucially, the correctness of transformations can be mathematically verified, making them exceptionally well-suited for measuring the symbolic reasoning of LLMs. We introduce Query4Regex, a new benchmark for evaluating verifiable transformations on regexes. Our benchmark tests two query formats: natural language instructions and a program-like domain-specific language (DSL) that specifies the sequence of operations. We evaluate a range of LLMs, verifying semantic correctness through rigorous deterministic finite automata (DFA) equivalence testing. Our empirical studies reveal: 1) the formal DSL significantly outperforms natural language, achieving up to 6.74%p accuracy gains on average. 2) Performance for both formats degrades sharply as compositional complexity increases, highlighting a core challenge in multi-step reasoning. 3) Models often generate plausible but unparsable outputs. Even among parsable outputs, semantic errors remain common, making failures difficult to detect without formal verification. Query4Regex provides a robust framework for analyzing the gap between LLMs’ linguistic fluency and their symbolic reasoning, paving the way for more reliable and verifiable manipulation of formal languages. Our code is available at https://github.com/peer0/Query4Regex.
%U https://aclanthology.org/2026.findings-eacl.331/
%P 6297-6305
Markdown (Informal)
[Query4Regex: Verifiable Regex Transformation through Formal Operations from NL and DSL Queries](https://aclanthology.org/2026.findings-eacl.331/) (Hahn & Han, Findings 2026)
ACL