@inproceedings{kammakomati-etal-2025-concodeeval,
title = "{C}on{C}ode{E}val: Evaluating Large Language Models for Code Constraints in Domain-Specific Languages",
author = "Kammakomati, Mehant and
Pimparkhede, Sameer and
Tamilselvam, Srikanth G. and
Kumar, Prince and
Bhattacharyya, Pushpak",
editor = "Rehm, Georg and
Li, Yunyao",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-industry.104/",
doi = "10.18653/v1/2025.acl-industry.104",
pages = "1466--1479",
ISBN = "979-8-89176-288-6",
abstract = "System-level programming is essential for modern enterprise infrastructure, enabling the automation and management of complex systems through declarative code. Developers write this code based on schemas, which themselves are a form of code that defines constraints like data types and required fields. These schemas help ensure operational correctness and smooth integration across systems. However, as enterprise schemas become complex, manually writing code adhering to these constraints becomes challenging for developers. Large Language Models (LLMs) have demonstrated potential in code generation and natural language understanding, particularly in zero-shot and few-shot settings. However, applying LLMs to handle constraints represented in code, essential for system-level programming rather than natural language, has not been explored. Hence, we introduce ConCodeEval, a study across two key dimensions: format and constraint efficacy, with a first-of-its-kind benchmark involving two novel experiments for code constraints across five representations (JSON, YAML, XML, Python, and natural language). Our findings suggest that conscious choice of representations can lead to optimal use of LLMs in enterprise use cases involving constraints. Nonetheless, LLMs continue to struggle significantly with code constraints, motivating the need for innovation in this direction."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kammakomati-etal-2025-concodeeval">
<titleInfo>
<title>ConCodeEval: Evaluating Large Language Models for Code Constraints in Domain-Specific Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehant</namePart>
<namePart type="family">Kammakomati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pimparkhede</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srikanth</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Tamilselvam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prince</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-288-6</identifier>
</relatedItem>
<abstract>System-level programming is essential for modern enterprise infrastructure, enabling the automation and management of complex systems through declarative code. Developers write this code based on schemas, which themselves are a form of code that defines constraints like data types and required fields. These schemas help ensure operational correctness and smooth integration across systems. However, as enterprise schemas become complex, manually writing code adhering to these constraints becomes challenging for developers. Large Language Models (LLMs) have demonstrated potential in code generation and natural language understanding, particularly in zero-shot and few-shot settings. However, applying LLMs to handle constraints represented in code, essential for system-level programming rather than natural language, has not been explored. Hence, we introduce ConCodeEval, a study across two key dimensions: format and constraint efficacy, with a first-of-its-kind benchmark involving two novel experiments for code constraints across five representations (JSON, YAML, XML, Python, and natural language). Our findings suggest that conscious choice of representations can lead to optimal use of LLMs in enterprise use cases involving constraints. Nonetheless, LLMs continue to struggle significantly with code constraints, motivating the need for innovation in this direction.</abstract>
<identifier type="citekey">kammakomati-etal-2025-concodeeval</identifier>
<identifier type="doi">10.18653/v1/2025.acl-industry.104</identifier>
<location>
<url>https://aclanthology.org/2025.acl-industry.104/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1466</start>
<end>1479</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ConCodeEval: Evaluating Large Language Models for Code Constraints in Domain-Specific Languages
%A Kammakomati, Mehant
%A Pimparkhede, Sameer
%A Tamilselvam, Srikanth G.
%A Kumar, Prince
%A Bhattacharyya, Pushpak
%Y Rehm, Georg
%Y Li, Yunyao
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-288-6
%F kammakomati-etal-2025-concodeeval
%X System-level programming is essential for modern enterprise infrastructure, enabling the automation and management of complex systems through declarative code. Developers write this code based on schemas, which themselves are a form of code that defines constraints like data types and required fields. These schemas help ensure operational correctness and smooth integration across systems. However, as enterprise schemas become complex, manually writing code adhering to these constraints becomes challenging for developers. Large Language Models (LLMs) have demonstrated potential in code generation and natural language understanding, particularly in zero-shot and few-shot settings. However, applying LLMs to handle constraints represented in code, essential for system-level programming rather than natural language, has not been explored. Hence, we introduce ConCodeEval, a study across two key dimensions: format and constraint efficacy, with a first-of-its-kind benchmark involving two novel experiments for code constraints across five representations (JSON, YAML, XML, Python, and natural language). Our findings suggest that conscious choice of representations can lead to optimal use of LLMs in enterprise use cases involving constraints. Nonetheless, LLMs continue to struggle significantly with code constraints, motivating the need for innovation in this direction.
%R 10.18653/v1/2025.acl-industry.104
%U https://aclanthology.org/2025.acl-industry.104/
%U https://doi.org/10.18653/v1/2025.acl-industry.104
%P 1466-1479
Markdown (Informal)
[ConCodeEval: Evaluating Large Language Models for Code Constraints in Domain-Specific Languages](https://aclanthology.org/2025.acl-industry.104/) (Kammakomati et al., ACL 2025)
ACL