@inproceedings{zhang-etal-2023-syntax,
title = "Syntax-Aware Retrieval Augmented Code Generation",
author = "Zhang, Xiangyu and
Zhou, Yu and
Yang, Guang and
Chen, Taolue",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.90/",
doi = "10.18653/v1/2023.findings-emnlp.90",
pages = "1291--1302",
abstract = "Neural code generation models are nowadays widely adopted to generate code from natural language descriptions automatically. Recently, pre-trained neural models equipped with token-level retrieval capabilities have exhibited great potentials in neural machine translation. However, applying them directly to code generation experience challenges: the use of the retrieval-based mechanism inevitably introduces extraneous noise to the generation process, resulting in even syntactically incorrect code. Computationally, such models necessitate frequent searches of the cached datastore, which turns out to be time-consuming. To address these issues, we propose $k$NN-TRANX, a token-level retrieval augmented code generation method. $k$NN-TRANX allows for searches in smaller datastores tailored for the code generation task. It leverages syntax constraints for the retrieval of datastores, which reduces the impact of retrieve noise. We evaluate $k$NN-TRANX on two public datasets and the experimental results confirm the effectiveness of our approach."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2023-syntax">
<titleInfo>
<title>Syntax-Aware Retrieval Augmented Code Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guang</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taolue</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural code generation models are nowadays widely adopted to generate code from natural language descriptions automatically. Recently, pre-trained neural models equipped with token-level retrieval capabilities have exhibited great potentials in neural machine translation. However, applying them directly to code generation experience challenges: the use of the retrieval-based mechanism inevitably introduces extraneous noise to the generation process, resulting in even syntactically incorrect code. Computationally, such models necessitate frequent searches of the cached datastore, which turns out to be time-consuming. To address these issues, we propose kNN-TRANX, a token-level retrieval augmented code generation method. kNN-TRANX allows for searches in smaller datastores tailored for the code generation task. It leverages syntax constraints for the retrieval of datastores, which reduces the impact of retrieve noise. We evaluate kNN-TRANX on two public datasets and the experimental results confirm the effectiveness of our approach.</abstract>
<identifier type="citekey">zhang-etal-2023-syntax</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.90</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.90/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>1291</start>
<end>1302</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Syntax-Aware Retrieval Augmented Code Generation
%A Zhang, Xiangyu
%A Zhou, Yu
%A Yang, Guang
%A Chen, Taolue
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F zhang-etal-2023-syntax
%X Neural code generation models are nowadays widely adopted to generate code from natural language descriptions automatically. Recently, pre-trained neural models equipped with token-level retrieval capabilities have exhibited great potentials in neural machine translation. However, applying them directly to code generation experience challenges: the use of the retrieval-based mechanism inevitably introduces extraneous noise to the generation process, resulting in even syntactically incorrect code. Computationally, such models necessitate frequent searches of the cached datastore, which turns out to be time-consuming. To address these issues, we propose kNN-TRANX, a token-level retrieval augmented code generation method. kNN-TRANX allows for searches in smaller datastores tailored for the code generation task. It leverages syntax constraints for the retrieval of datastores, which reduces the impact of retrieve noise. We evaluate kNN-TRANX on two public datasets and the experimental results confirm the effectiveness of our approach.
%R 10.18653/v1/2023.findings-emnlp.90
%U https://aclanthology.org/2023.findings-emnlp.90/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.90
%P 1291-1302
Markdown (Informal)
[Syntax-Aware Retrieval Augmented Code Generation](https://aclanthology.org/2023.findings-emnlp.90/) (Zhang et al., Findings 2023)
ACL
- Xiangyu Zhang, Yu Zhou, Guang Yang, and Taolue Chen. 2023. Syntax-Aware Retrieval Augmented Code Generation. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 1291–1302, Singapore. Association for Computational Linguistics.