@inproceedings{huo-etal-2026-reposhapley,
title = "{R}epo{S}hapley: Shapley-Enhanced Context Filtering for Repository-Level Code Completion",
author = "Huo, Yu and
Zeng, Kun and
Zhang, Siyu and
LU, Yuquan and
Yang, Cheng and
Guo, Yifu and
Tang, Xiaoying",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.505/",
pages = "10390--10412",
ISBN = "979-8-89176-395-1",
abstract = "Repository-level code completion benefits from retrieval-augmented generation (RAG). However, controlling cross-file evidence is difficult because chunk utility is often interaction-dependent: some snippets help only when paired with complementary context, while others harm decoding when they conflict. We propose RepoShapley, a coalition-aware context filtering framework supervised by Shapley-style marginal contributions. Our offline labeling module, ChunkShapley, estimates signed per-chunk effects via teacher-forced probing, feeds them into a lightweight surrogate game that captures saturation and interference, computes exact Shapley values for small retrieval sets, and selects a decoding-optimal coalition through bounded post-verification with the frozen generator. The verified $<$KEEP$>$ / $<$DROP$>$ decisions and retrieval triggers are then distilled into a single model via discrete control tokens. Experiments across benchmarks and backbones show that RepoShapley improves completion quality while reducing harmful context and unnecessary retrieval."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huo-etal-2026-reposhapley">
<titleInfo>
<title>RepoShapley: Shapley-Enhanced Context Filtering for Repository-Level Code Completion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Huo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kun</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuquan</namePart>
<namePart type="family">LU</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifu</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoying</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Repository-level code completion benefits from retrieval-augmented generation (RAG). However, controlling cross-file evidence is difficult because chunk utility is often interaction-dependent: some snippets help only when paired with complementary context, while others harm decoding when they conflict. We propose RepoShapley, a coalition-aware context filtering framework supervised by Shapley-style marginal contributions. Our offline labeling module, ChunkShapley, estimates signed per-chunk effects via teacher-forced probing, feeds them into a lightweight surrogate game that captures saturation and interference, computes exact Shapley values for small retrieval sets, and selects a decoding-optimal coalition through bounded post-verification with the frozen generator. The verified <KEEP> / <DROP> decisions and retrieval triggers are then distilled into a single model via discrete control tokens. Experiments across benchmarks and backbones show that RepoShapley improves completion quality while reducing harmful context and unnecessary retrieval.</abstract>
<identifier type="citekey">huo-etal-2026-reposhapley</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.505/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>10390</start>
<end>10412</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RepoShapley: Shapley-Enhanced Context Filtering for Repository-Level Code Completion
%A Huo, Yu
%A Zeng, Kun
%A Zhang, Siyu
%A LU, Yuquan
%A Yang, Cheng
%A Guo, Yifu
%A Tang, Xiaoying
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F huo-etal-2026-reposhapley
%X Repository-level code completion benefits from retrieval-augmented generation (RAG). However, controlling cross-file evidence is difficult because chunk utility is often interaction-dependent: some snippets help only when paired with complementary context, while others harm decoding when they conflict. We propose RepoShapley, a coalition-aware context filtering framework supervised by Shapley-style marginal contributions. Our offline labeling module, ChunkShapley, estimates signed per-chunk effects via teacher-forced probing, feeds them into a lightweight surrogate game that captures saturation and interference, computes exact Shapley values for small retrieval sets, and selects a decoding-optimal coalition through bounded post-verification with the frozen generator. The verified <KEEP> / <DROP> decisions and retrieval triggers are then distilled into a single model via discrete control tokens. Experiments across benchmarks and backbones show that RepoShapley improves completion quality while reducing harmful context and unnecessary retrieval.
%U https://aclanthology.org/2026.findings-acl.505/
%P 10390-10412
Markdown (Informal)
[RepoShapley: Shapley-Enhanced Context Filtering for Repository-Level Code Completion](https://aclanthology.org/2026.findings-acl.505/) (Huo et al., Findings 2026)
ACL
- Yu Huo, Kun Zeng, Siyu Zhang, Yuquan LU, Cheng Yang, Yifu Guo, and Xiaoying Tang. 2026. RepoShapley: Shapley-Enhanced Context Filtering for Repository-Level Code Completion. In Findings of the Association for Computational Linguistics: ACL 2026, pages 10390–10412, San Diego, California, United States. Association for Computational Linguistics.