@inproceedings{strich-etal-2024-improving,
title = "On Improving Repository-Level Code {QA} for Large Language Models",
author = "Strich, Jan and
Schneider, Florian and
Nikishina, Irina and
Biemann, Chris",
editor = "Fu, Xiyan and
Fleisig, Eve",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-srw.28/",
doi = "10.18653/v1/2024.acl-srw.28",
pages = "209--244",
abstract = "Large Language Models (LLMs) such as ChatGPT, GitHub Copilot, Llama, or Mistral assist programmers as copilots and knowledge sources to make the coding process faster and more efficient. This paper aims to improve the copilot performance by implementing different self-alignment processes and retrieval-augmented generation (RAG) pipelines, as well as their combination. To test the effectiveness of all approaches, we create a dataset and apply a model-based evaluation, using LLM as a judge. It is designed to check the model`s abilities to understand the source code semantics, the dependency between files, and the overall meta-information about the repository. We also compare our approach with other existing solutions, e.g. ChatGPT-3.5, and evaluate on the existing benchmarks. Code and dataset are available online (https://anonymous.4open.science/r/ma{\_}llm-382D)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="strich-etal-2024-improving">
<titleInfo>
<title>On Improving Repository-Level Code QA for Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Strich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Nikishina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiyan</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eve</namePart>
<namePart type="family">Fleisig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) such as ChatGPT, GitHub Copilot, Llama, or Mistral assist programmers as copilots and knowledge sources to make the coding process faster and more efficient. This paper aims to improve the copilot performance by implementing different self-alignment processes and retrieval-augmented generation (RAG) pipelines, as well as their combination. To test the effectiveness of all approaches, we create a dataset and apply a model-based evaluation, using LLM as a judge. It is designed to check the model‘s abilities to understand the source code semantics, the dependency between files, and the overall meta-information about the repository. We also compare our approach with other existing solutions, e.g. ChatGPT-3.5, and evaluate on the existing benchmarks. Code and dataset are available online (https://anonymous.4open.science/r/ma_llm-382D).</abstract>
<identifier type="citekey">strich-etal-2024-improving</identifier>
<identifier type="doi">10.18653/v1/2024.acl-srw.28</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-srw.28/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>209</start>
<end>244</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Improving Repository-Level Code QA for Large Language Models
%A Strich, Jan
%A Schneider, Florian
%A Nikishina, Irina
%A Biemann, Chris
%Y Fu, Xiyan
%Y Fleisig, Eve
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F strich-etal-2024-improving
%X Large Language Models (LLMs) such as ChatGPT, GitHub Copilot, Llama, or Mistral assist programmers as copilots and knowledge sources to make the coding process faster and more efficient. This paper aims to improve the copilot performance by implementing different self-alignment processes and retrieval-augmented generation (RAG) pipelines, as well as their combination. To test the effectiveness of all approaches, we create a dataset and apply a model-based evaluation, using LLM as a judge. It is designed to check the model‘s abilities to understand the source code semantics, the dependency between files, and the overall meta-information about the repository. We also compare our approach with other existing solutions, e.g. ChatGPT-3.5, and evaluate on the existing benchmarks. Code and dataset are available online (https://anonymous.4open.science/r/ma_llm-382D).
%R 10.18653/v1/2024.acl-srw.28
%U https://aclanthology.org/2024.luhme-srw.28/
%U https://doi.org/10.18653/v1/2024.acl-srw.28
%P 209-244
Markdown (Informal)
[On Improving Repository-Level Code QA for Large Language Models](https://aclanthology.org/2024.luhme-srw.28/) (Strich et al., ACL 2024)
ACL
- Jan Strich, Florian Schneider, Irina Nikishina, and Chris Biemann. 2024. On Improving Repository-Level Code QA for Large Language Models. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 209–244, Bangkok, Thailand. Association for Computational Linguistics.