@inproceedings{sun-etal-2024-transcoder,
title = "{T}rans{C}oder: Towards Unified Transferable Code Representation Learning Inspired by Human Skills",
author = "Sun, Qiushi and
Chen, Nuo and
Wang, Jianing and
Gao, Ming and
Li, Xiang",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1453",
pages = "16713--16726",
abstract = "Code pre-trained models (CodePTMs) have recently demonstrated a solid capacity to process various code intelligence tasks, e.g., code clone detection, code translation, and code summarization. The current mainstream method that deploys these models to downstream tasks is to fine-tune them on individual tasks, which is generally costly and needs sufficient data for large models. To tackle the issue, in this paper, we present TransCoder, a unified Transferable fine-tuning strategy for Code representation learning. Inspired by human inherent skills of knowledge generalization, TransCoder drives the model to learn better code-related knowledge like human programmers. Specifically, we employ a tunable prefix encoder to first capture cross-task and cross-language transferable knowledge, subsequently applying the acquired knowledge for optimized downstream adaptation. Besides, our approach confers benefits for tasks with minor training sample sizes and languages with smaller corpora, underscoring versatility and efficacy. Extensive experiments conducted on representative datasets clearly demonstrate that our method can lead to superior performance on various code-related tasks and encourage mutual reinforcement, especially in low-resource scenarios. Our codes are available at https://github.com/QiushiSun/TransCoder.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sun-etal-2024-transcoder">
<titleInfo>
<title>TransCoder: Towards Unified Transferable Code Representation Learning Inspired by Human Skills</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qiushi</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuo</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianing</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code pre-trained models (CodePTMs) have recently demonstrated a solid capacity to process various code intelligence tasks, e.g., code clone detection, code translation, and code summarization. The current mainstream method that deploys these models to downstream tasks is to fine-tune them on individual tasks, which is generally costly and needs sufficient data for large models. To tackle the issue, in this paper, we present TransCoder, a unified Transferable fine-tuning strategy for Code representation learning. Inspired by human inherent skills of knowledge generalization, TransCoder drives the model to learn better code-related knowledge like human programmers. Specifically, we employ a tunable prefix encoder to first capture cross-task and cross-language transferable knowledge, subsequently applying the acquired knowledge for optimized downstream adaptation. Besides, our approach confers benefits for tasks with minor training sample sizes and languages with smaller corpora, underscoring versatility and efficacy. Extensive experiments conducted on representative datasets clearly demonstrate that our method can lead to superior performance on various code-related tasks and encourage mutual reinforcement, especially in low-resource scenarios. Our codes are available at https://github.com/QiushiSun/TransCoder.</abstract>
<identifier type="citekey">sun-etal-2024-transcoder</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1453</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>16713</start>
<end>16726</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TransCoder: Towards Unified Transferable Code Representation Learning Inspired by Human Skills
%A Sun, Qiushi
%A Chen, Nuo
%A Wang, Jianing
%A Gao, Ming
%A Li, Xiang
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F sun-etal-2024-transcoder
%X Code pre-trained models (CodePTMs) have recently demonstrated a solid capacity to process various code intelligence tasks, e.g., code clone detection, code translation, and code summarization. The current mainstream method that deploys these models to downstream tasks is to fine-tune them on individual tasks, which is generally costly and needs sufficient data for large models. To tackle the issue, in this paper, we present TransCoder, a unified Transferable fine-tuning strategy for Code representation learning. Inspired by human inherent skills of knowledge generalization, TransCoder drives the model to learn better code-related knowledge like human programmers. Specifically, we employ a tunable prefix encoder to first capture cross-task and cross-language transferable knowledge, subsequently applying the acquired knowledge for optimized downstream adaptation. Besides, our approach confers benefits for tasks with minor training sample sizes and languages with smaller corpora, underscoring versatility and efficacy. Extensive experiments conducted on representative datasets clearly demonstrate that our method can lead to superior performance on various code-related tasks and encourage mutual reinforcement, especially in low-resource scenarios. Our codes are available at https://github.com/QiushiSun/TransCoder.
%U https://aclanthology.org/2024.lrec-main.1453
%P 16713-16726
Markdown (Informal)
[TransCoder: Towards Unified Transferable Code Representation Learning Inspired by Human Skills](https://aclanthology.org/2024.lrec-main.1453) (Sun et al., LREC-COLING 2024)
ACL