@inproceedings{zhang-etal-2025-fine,
title = "Fine-Grained Features-based Code Search for Precise Query-Code Matching",
author = "Zhang, Xinting and
Cheng, Mengqiu and
Wang, Mengzhen and
Gong, Songwen and
Xie, Jiayuan and
Cai, Yi and
Li, Qing",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.482/",
pages = "7229--7238",
abstract = "Code search aims to quickly locate target code snippets from databases using natural language queries, which promotes code reusability. Existing methods can effectively obtain aligned token-level and query word-level features. However, these studies usually represent the semantics of code and query by averaging the features of each token and word respectively, which makes it difficult to accurately capture the code details that are closely related to the query. To address this issue, we propose a fine-grained code search model that consists of a cross-modal encoder, a mapping layer, and a classification layer. Specifically, we utilize a pre-trained model, GraphCodeBERT, in the cross-modal encoder to align features. In the mapping layer, we introduce a co-attention network to capture the fine-grained interactions between code and query, ensuring a model can precisely identify key code segments relevant to the query. Finally, in the classification layer, we incorporate instruction learning techniques that leverage contextual reasoning to improve the accuracy of query-code matching. Experimental results show that our proposed model significantly outperforms existing methods across multiple programming language datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-fine">
<titleInfo>
<title>Fine-Grained Features-based Code Search for Precise Query-Code Matching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinting</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mengqiu</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mengzhen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Songwen</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiayuan</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qing</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code search aims to quickly locate target code snippets from databases using natural language queries, which promotes code reusability. Existing methods can effectively obtain aligned token-level and query word-level features. However, these studies usually represent the semantics of code and query by averaging the features of each token and word respectively, which makes it difficult to accurately capture the code details that are closely related to the query. To address this issue, we propose a fine-grained code search model that consists of a cross-modal encoder, a mapping layer, and a classification layer. Specifically, we utilize a pre-trained model, GraphCodeBERT, in the cross-modal encoder to align features. In the mapping layer, we introduce a co-attention network to capture the fine-grained interactions between code and query, ensuring a model can precisely identify key code segments relevant to the query. Finally, in the classification layer, we incorporate instruction learning techniques that leverage contextual reasoning to improve the accuracy of query-code matching. Experimental results show that our proposed model significantly outperforms existing methods across multiple programming language datasets.</abstract>
<identifier type="citekey">zhang-etal-2025-fine</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.482/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>7229</start>
<end>7238</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-Grained Features-based Code Search for Precise Query-Code Matching
%A Zhang, Xinting
%A Cheng, Mengqiu
%A Wang, Mengzhen
%A Gong, Songwen
%A Xie, Jiayuan
%A Cai, Yi
%A Li, Qing
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F zhang-etal-2025-fine
%X Code search aims to quickly locate target code snippets from databases using natural language queries, which promotes code reusability. Existing methods can effectively obtain aligned token-level and query word-level features. However, these studies usually represent the semantics of code and query by averaging the features of each token and word respectively, which makes it difficult to accurately capture the code details that are closely related to the query. To address this issue, we propose a fine-grained code search model that consists of a cross-modal encoder, a mapping layer, and a classification layer. Specifically, we utilize a pre-trained model, GraphCodeBERT, in the cross-modal encoder to align features. In the mapping layer, we introduce a co-attention network to capture the fine-grained interactions between code and query, ensuring a model can precisely identify key code segments relevant to the query. Finally, in the classification layer, we incorporate instruction learning techniques that leverage contextual reasoning to improve the accuracy of query-code matching. Experimental results show that our proposed model significantly outperforms existing methods across multiple programming language datasets.
%U https://aclanthology.org/2025.coling-main.482/
%P 7229-7238
Markdown (Informal)
[Fine-Grained Features-based Code Search for Precise Query-Code Matching](https://aclanthology.org/2025.coling-main.482/) (Zhang et al., COLING 2025)
ACL