@inproceedings{dutta-etal-2024-rar,
title = "{RAR}: Retrieval-augmented retrieval for code generation in low resource languages",
author = "Dutta, Avik and
Singh, Mukul and
Verbruggen, Gust and
Gulwani, Sumit and
Le, Vu",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1199",
doi = "10.18653/v1/2024.emnlp-main.1199",
pages = "21506--21515",
abstract = "Language models struggle in generating code for low-resource programming languages, since these are underrepresented in training data. Either examples or documentation are commonly used for improved code generation. We propose to use both types of information together and present retrieval augmented retrieval (RAR) as a two-step method for selecting relevant examples and documentation. Experiments on three low-resource languages (Power Query M, OfficeScript and Excel formulas) show that RAR outperforms independently example and grammar retrieval (+2.81{--}26.14{\%}). Interestingly, we show that two-step retrieval selects better examples and documentation when used independently as well.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dutta-etal-2024-rar">
<titleInfo>
<title>RAR: Retrieval-augmented retrieval for code generation in low resource languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Avik</namePart>
<namePart type="family">Dutta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mukul</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gust</namePart>
<namePart type="family">Verbruggen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumit</namePart>
<namePart type="family">Gulwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vu</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models struggle in generating code for low-resource programming languages, since these are underrepresented in training data. Either examples or documentation are commonly used for improved code generation. We propose to use both types of information together and present retrieval augmented retrieval (RAR) as a two-step method for selecting relevant examples and documentation. Experiments on three low-resource languages (Power Query M, OfficeScript and Excel formulas) show that RAR outperforms independently example and grammar retrieval (+2.81–26.14%). Interestingly, we show that two-step retrieval selects better examples and documentation when used independently as well.</abstract>
<identifier type="citekey">dutta-etal-2024-rar</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.1199</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1199</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>21506</start>
<end>21515</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RAR: Retrieval-augmented retrieval for code generation in low resource languages
%A Dutta, Avik
%A Singh, Mukul
%A Verbruggen, Gust
%A Gulwani, Sumit
%A Le, Vu
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F dutta-etal-2024-rar
%X Language models struggle in generating code for low-resource programming languages, since these are underrepresented in training data. Either examples or documentation are commonly used for improved code generation. We propose to use both types of information together and present retrieval augmented retrieval (RAR) as a two-step method for selecting relevant examples and documentation. Experiments on three low-resource languages (Power Query M, OfficeScript and Excel formulas) show that RAR outperforms independently example and grammar retrieval (+2.81–26.14%). Interestingly, we show that two-step retrieval selects better examples and documentation when used independently as well.
%R 10.18653/v1/2024.emnlp-main.1199
%U https://aclanthology.org/2024.emnlp-main.1199
%U https://doi.org/10.18653/v1/2024.emnlp-main.1199
%P 21506-21515
Markdown (Informal)
[RAR: Retrieval-augmented retrieval for code generation in low resource languages](https://aclanthology.org/2024.emnlp-main.1199) (Dutta et al., EMNLP 2024)
ACL