@inproceedings{wang-etal-2026-mancc,
title = "{M}an{CC}: A Task-Anchored Benchmark for {M}anchu{--}Classical {C}hinese Cross-Lingual Modeling",
author = "Wang, Meiqi and
Sun, Xiaoxin and
Wang, Dongjie and
Yu, Ruixin and
Heng, Xiantao and
Wang, Shuo and
Huang, Zhen and
Zhao, Peng and
Wang, Suhua and
Yin, Minghao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1359/",
pages = "27271--27292",
ISBN = "979-8-89176-395-1",
abstract = "Research in cross-lingual modeling for historical and extremely low-resource languages is hindered by the absence of standardized evaluation benchmarks. To address this, we present ManCC{---}the first task-anchored benchmark for Manchu{--}Classical Chinese translation. ManCC consists of a high-quality parallel corpus of 16,627 sentence pairs, derived from the Qing-dynasty historical text Manwen Laodang-Taizong, and a reproducible evaluation protocol that combines automatic metrics (BLEU and chrF) with a three-dimensional human assessment (fidelity, fluency, linguistic normativity). Through systematic evaluation across three model families (non-pretrained, multilingual pretrained, and large language models), we find that linguistic differences significantly influence performance, broader language coverage in multilingual pretraining facilitates low-resource transfer, and automatic metrics often fail to capture essential errors in historical translation{---}underscoring the necessity of human evaluation. ManCC not only provides foundational resources for Manchu{--}Classical Chinese translation but also establishes a diagnosable, reproducible platform for cross-lingual modeling of historical low-resource languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-mancc">
<titleInfo>
<title>ManCC: A Task-Anchored Benchmark for Manchu–Classical Chinese Cross-Lingual Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meiqi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoxin</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongjie</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruixin</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiantao</namePart>
<namePart type="family">Heng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peng</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suhua</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minghao</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Research in cross-lingual modeling for historical and extremely low-resource languages is hindered by the absence of standardized evaluation benchmarks. To address this, we present ManCC—the first task-anchored benchmark for Manchu–Classical Chinese translation. ManCC consists of a high-quality parallel corpus of 16,627 sentence pairs, derived from the Qing-dynasty historical text Manwen Laodang-Taizong, and a reproducible evaluation protocol that combines automatic metrics (BLEU and chrF) with a three-dimensional human assessment (fidelity, fluency, linguistic normativity). Through systematic evaluation across three model families (non-pretrained, multilingual pretrained, and large language models), we find that linguistic differences significantly influence performance, broader language coverage in multilingual pretraining facilitates low-resource transfer, and automatic metrics often fail to capture essential errors in historical translation—underscoring the necessity of human evaluation. ManCC not only provides foundational resources for Manchu–Classical Chinese translation but also establishes a diagnosable, reproducible platform for cross-lingual modeling of historical low-resource languages.</abstract>
<identifier type="citekey">wang-etal-2026-mancc</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1359/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>27271</start>
<end>27292</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ManCC: A Task-Anchored Benchmark for Manchu–Classical Chinese Cross-Lingual Modeling
%A Wang, Meiqi
%A Sun, Xiaoxin
%A Wang, Dongjie
%A Yu, Ruixin
%A Heng, Xiantao
%A Wang, Shuo
%A Huang, Zhen
%A Zhao, Peng
%A Wang, Suhua
%A Yin, Minghao
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F wang-etal-2026-mancc
%X Research in cross-lingual modeling for historical and extremely low-resource languages is hindered by the absence of standardized evaluation benchmarks. To address this, we present ManCC—the first task-anchored benchmark for Manchu–Classical Chinese translation. ManCC consists of a high-quality parallel corpus of 16,627 sentence pairs, derived from the Qing-dynasty historical text Manwen Laodang-Taizong, and a reproducible evaluation protocol that combines automatic metrics (BLEU and chrF) with a three-dimensional human assessment (fidelity, fluency, linguistic normativity). Through systematic evaluation across three model families (non-pretrained, multilingual pretrained, and large language models), we find that linguistic differences significantly influence performance, broader language coverage in multilingual pretraining facilitates low-resource transfer, and automatic metrics often fail to capture essential errors in historical translation—underscoring the necessity of human evaluation. ManCC not only provides foundational resources for Manchu–Classical Chinese translation but also establishes a diagnosable, reproducible platform for cross-lingual modeling of historical low-resource languages.
%U https://aclanthology.org/2026.findings-acl.1359/
%P 27271-27292
Markdown (Informal)
[ManCC: A Task-Anchored Benchmark for Manchu–Classical Chinese Cross-Lingual Modeling](https://aclanthology.org/2026.findings-acl.1359/) (Wang et al., Findings 2026)
ACL
- Meiqi Wang, Xiaoxin Sun, Dongjie Wang, Ruixin Yu, Xiantao Heng, Shuo Wang, Zhen Huang, Peng Zhao, Suhua Wang, and Minghao Yin. 2026. ManCC: A Task-Anchored Benchmark for Manchu–Classical Chinese Cross-Lingual Modeling. In Findings of the Association for Computational Linguistics: ACL 2026, pages 27271–27292, San Diego, California, United States. Association for Computational Linguistics.