@inproceedings{zhang-etal-2025-skeleton,
title = "Skeleton-Guided-Translation: A Benchmarking Framework for Code Repository Translation with Fine-Grained Quality Evaluation",
author = "Zhang, Xing and
Wen, Jiaheng and
Yang, Fangkai and
Kang, Yu and
Zhao, Pu and
Wang, Junhao and
Wang, Maoquan and
Huang, Yufan and
Fu, Shengyu and
Nallipogu, Elsie and
Lin, Qingwei and
Dang, Yingnong and
Rajmohan, Saravan and
Zhang, Dongmei",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.986/",
pages = "18187--18198",
ISBN = "979-8-89176-335-7",
abstract = "Code translation benchmarks are essential for evaluating the accuracy and efficiency of LLM-based systems. Existing benchmarks mainly target individual functions, overlooking repository-level challenges like intermodule coherence and dependency management. Recent repository-level efforts exist, but suffer from poor maintainability and coarse evaluation granularity. We introduce Skeleton-Guided-Translation, a framework for benchmarking Java-to-C{\#} translation at the repository level, featuring fine-grained quality evaluation. It follows a two-step process: first translating repository ``skeletons'', then refining the entire repository guided by these skeletons. Based on this, we present TRANSREPO-BENCH , the first test-driven benchmark of high-quality Java repositories paired with C{\#} skeletons, unit tests, and build configurations. Our adaptive unit tests support multiple and incremental translations without manual tuning, enhancing automation and scalability. We also propose fine-grained metrics that evaluate translation quality per test case, overcoming limitations of binary metrics in distinguishing build failures. Evaluations using TRANSREPO-BENCH reveal issues like broken cross-file references, showing that our structured approach reduces dependency errors and preserves interface consistency."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-skeleton">
<titleInfo>
<title>Skeleton-Guided-Translation: A Benchmarking Framework for Code Repository Translation with Fine-Grained Quality Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaheng</namePart>
<namePart type="family">Wen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fangkai</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junhao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maoquan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yufan</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengyu</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elsie</namePart>
<namePart type="family">Nallipogu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingwei</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingnong</namePart>
<namePart type="family">Dang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saravan</namePart>
<namePart type="family">Rajmohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongmei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Code translation benchmarks are essential for evaluating the accuracy and efficiency of LLM-based systems. Existing benchmarks mainly target individual functions, overlooking repository-level challenges like intermodule coherence and dependency management. Recent repository-level efforts exist, but suffer from poor maintainability and coarse evaluation granularity. We introduce Skeleton-Guided-Translation, a framework for benchmarking Java-to-C# translation at the repository level, featuring fine-grained quality evaluation. It follows a two-step process: first translating repository “skeletons”, then refining the entire repository guided by these skeletons. Based on this, we present TRANSREPO-BENCH , the first test-driven benchmark of high-quality Java repositories paired with C# skeletons, unit tests, and build configurations. Our adaptive unit tests support multiple and incremental translations without manual tuning, enhancing automation and scalability. We also propose fine-grained metrics that evaluate translation quality per test case, overcoming limitations of binary metrics in distinguishing build failures. Evaluations using TRANSREPO-BENCH reveal issues like broken cross-file references, showing that our structured approach reduces dependency errors and preserves interface consistency.</abstract>
<identifier type="citekey">zhang-etal-2025-skeleton</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.986/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>18187</start>
<end>18198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Skeleton-Guided-Translation: A Benchmarking Framework for Code Repository Translation with Fine-Grained Quality Evaluation
%A Zhang, Xing
%A Wen, Jiaheng
%A Yang, Fangkai
%A Kang, Yu
%A Zhao, Pu
%A Wang, Junhao
%A Wang, Maoquan
%A Huang, Yufan
%A Fu, Shengyu
%A Nallipogu, Elsie
%A Lin, Qingwei
%A Dang, Yingnong
%A Rajmohan, Saravan
%A Zhang, Dongmei
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F zhang-etal-2025-skeleton
%X Code translation benchmarks are essential for evaluating the accuracy and efficiency of LLM-based systems. Existing benchmarks mainly target individual functions, overlooking repository-level challenges like intermodule coherence and dependency management. Recent repository-level efforts exist, but suffer from poor maintainability and coarse evaluation granularity. We introduce Skeleton-Guided-Translation, a framework for benchmarking Java-to-C# translation at the repository level, featuring fine-grained quality evaluation. It follows a two-step process: first translating repository “skeletons”, then refining the entire repository guided by these skeletons. Based on this, we present TRANSREPO-BENCH , the first test-driven benchmark of high-quality Java repositories paired with C# skeletons, unit tests, and build configurations. Our adaptive unit tests support multiple and incremental translations without manual tuning, enhancing automation and scalability. We also propose fine-grained metrics that evaluate translation quality per test case, overcoming limitations of binary metrics in distinguishing build failures. Evaluations using TRANSREPO-BENCH reveal issues like broken cross-file references, showing that our structured approach reduces dependency errors and preserves interface consistency.
%U https://aclanthology.org/2025.findings-emnlp.986/
%P 18187-18198
Markdown (Informal)
[Skeleton-Guided-Translation: A Benchmarking Framework for Code Repository Translation with Fine-Grained Quality Evaluation](https://aclanthology.org/2025.findings-emnlp.986/) (Zhang et al., Findings 2025)
ACL
- Xing Zhang, Jiaheng Wen, Fangkai Yang, Yu Kang, Pu Zhao, Junhao Wang, Maoquan Wang, Yufan Huang, Shengyu Fu, Elsie Nallipogu, Qingwei Lin, Yingnong Dang, Saravan Rajmohan, and Dongmei Zhang. 2025. Skeleton-Guided-Translation: A Benchmarking Framework for Code Repository Translation with Fine-Grained Quality Evaluation. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 18187–18198, Suzhou, China. Association for Computational Linguistics.